libdap Updated for version 3.20.8
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27#include "config.h"
28
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32
33#include <sys/stat.h>
34
35#ifdef WIN32
36#include <io.h>
37#endif
38
39#include <string>
40#include <vector>
41#include <functional>
42#include <algorithm>
43#include <sstream>
44#include <fstream>
45#include <iterator>
46#include <cstdlib>
47#include <cstring>
48#include <cerrno>
49
50//#define DODS_DEBUG2
51//#define HTTP_TRACE
52//#define DODS_DEBUG
53
54#undef USE_GETENV
55
56
57#include "debug.h"
58#include "mime_util.h"
59#include "media_types.h"
60#include "GNURegex.h"
61#include "HTTPCache.h"
62#include "HTTPConnect.h"
63#include "RCReader.h"
64#include "HTTPResponse.h"
65#include "HTTPCacheResponse.h"
66
67using namespace std;
68
69namespace libdap {
70
71// These global variables are not MT-Safe, but I'm leaving them as is because
72// they are used only for debugging (set them in a debugger like gdb or ddd).
73// They are not static because I think that many debuggers cannot access
74// static variables. 08/07/02 jhrg
75
76// Set this to 1 to turn on libcurl's verbose mode (for debugging).
77int www_trace = 0;
78
79// Set this to 1 to turn on libcurl's VERY verbose mode.
80int www_trace_extensive = 0;
81
82// Keep the temporary files; useful for debugging.
83int dods_keep_temps = 0;
84
85#define CLIENT_ERR_MIN 400
86#define CLIENT_ERR_MAX 417
87static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88 {
89 "Bad Request:",
90 "Unauthorized: Contact the server administrator.",
91 "Payment Required.",
92 "Forbidden: Contact the server administrator.",
93 "Not Found: The data source or server could not be found.\n\
94 Often this means that the OPeNDAP server is missing or needs attention.\n\
95 Please contact the server administrator.",
96 "Method Not Allowed.",
97 "Not Acceptable.",
98 "Proxy Authentication Required.",
99 "Request Time-out.",
100 "Conflict.",
101 "Gone:.",
102 "Length Required.",
103 "Precondition Failed.",
104 "Request Entity Too Large.",
105 "Request URI Too Large.",
106 "Unsupported Media Type.",
107 "Requested Range Not Satisfiable.",
108 "Expectation Failed."
109 };
110
111#define SERVER_ERR_MIN 500
112#define SERVER_ERR_MAX 505
113static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114 {
115 "Internal Server Error.",
116 "Not Implemented.",
117 "Bad Gateway.",
118 "Service Unavailable.",
119 "Gateway Time-out.",
120 "HTTP Version Not Supported."
121 };
122
125static string
126http_status_to_string(int status)
127{
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
132 else
133 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134}
135
136static ObjectType
137determine_object_type(const string &header_value)
138{
139 // DAP4 Data: application/vnd.opendap.dap4.data
140 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141
142 string::size_type plus = header_value.find('+');
143 string base_type;
144 string type_extension = "";
145 if (plus != string::npos) {
146 base_type= header_value.substr(0, plus);
147 type_extension = header_value.substr(plus+1);
148 }
149 else
150 base_type = header_value;
151
152 if (base_type == DMR_Content_Type
153 || (base_type.find("application/") != string::npos
154 && base_type.find("dap4.dataset-metadata") != string::npos)) {
155 if (type_extension == "xml")
156 return dap4_dmr;
157 else
158 return unknown_type;
159 }
160 else if (base_type == DAP4_DATA_Content_Type
161 || (base_type.find("application/") != string::npos
162 && base_type.find("dap4.data") != string::npos)) {
163 return dap4_data;
164 }
165 else if (header_value.find("text/html") != string::npos) {
166 return web_error;
167 }
168 else
169 return unknown_type;
170}
171
176class ParseHeader : public unary_function<const string &, void>
177{
178 ObjectType type; // What type of object is in the stream?
179 string server; // Server's version string.
180 string protocol; // Server's protocol version.
181 string location; // Url returned by server
182
183public:
184 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185 { }
186
187 void operator()(const string &line)
188 {
189 string name, value;
190 parse_mime_header(line, name, value);
191
192 DBG2(cerr << name << ": " << value << endl);
193
194 // Content-Type is used to determine the content of DAP4 responses, but allow the
195 // Content-Description header to override CT o preserve operation with DAP2 servers.
196 // jhrg 11/12/13
197 if (type == unknown_type && name == "content-type") {
198 type = determine_object_type(value); // see above
199 }
200 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201 type = get_description_type(value); // defined in mime_util.cc
202 }
203 // The second test (== "dods/0.0") tests if xopendap-server has already
204 // been seen. If so, use that header in preference to the old
205 // XDODS-Server header. jhrg 2/7/06
206 else if (name == "xdods-server" && server == "dods/0.0") {
207 server = value;
208 }
209 else if (name == "xopendap-server") {
210 server = value;
211 }
212 else if (name == "xdap") {
213 protocol = value;
214 }
215 else if (server == "dods/0.0" && name == "server") {
216 server = value;
217 }
218 else if (name == "location") {
219 location = value;
220 }
221 }
222
223 ObjectType get_object_type()
224 {
225 return type;
226 }
227
228 string get_server()
229 {
230 return server;
231 }
232
233 string get_protocol()
234 {
235 return protocol;
236 }
237
238 string get_location() {
239 return location;
240 }
241};
242
258static size_t
259save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260{
261 DBG2(cerr << "Inside the header parser." << endl);
262 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263
264 // Grab the header, minus the trailing newline. Or \r\n pair.
265 string complete_line;
266 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268 else
269 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270
271 // Store all non-empty headers that are not HTTP status codes
272 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273 DBG(cerr << "Header line: " << complete_line << endl);
274 hdrs->push_back(complete_line);
275 }
276
277 return size * nmemb;
278}
279
281static int
282curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283{
284 string message(msg, size);
285
286 switch (info) {
287 case CURLINFO_TEXT:
288 cerr << "Text: " << message; break;
289 case CURLINFO_HEADER_IN:
290 cerr << "Header in: " << message; break;
291 case CURLINFO_HEADER_OUT:
292 cerr << "Header out: " << message; break;
293 case CURLINFO_DATA_IN:
294 if (www_trace_extensive)
295 cerr << "Data in: " << message; break;
296 case CURLINFO_DATA_OUT:
297 if (www_trace_extensive)
298 cerr << "Data out: " << message; break;
299 case CURLINFO_END:
300 cerr << "End: " << message; break;
301#ifdef CURLINFO_SSL_DATA_IN
302 case CURLINFO_SSL_DATA_IN:
303 cerr << "SSL Data in: " << message; break;
304#endif
305#ifdef CURLINFO_SSL_DATA_OUT
306 case CURLINFO_SSL_DATA_OUT:
307 cerr << "SSL Data out: " << message; break;
308#endif
309 default:
310 if (www_trace_extensive)
311 cerr << "Curl info: " << message; break;
312 }
313 return 0;
314}
315
319void
320HTTPConnect::www_lib_init()
321{
322 curl_global_init(CURL_GLOBAL_DEFAULT);
323
324 d_curl = curl_easy_init();
325 if (!d_curl)
326 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
327
328 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
329
330 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
331
332 // Now set options that will remain constant for the duration of this
333 // CURL object.
334
335 // Set the proxy host.
336 if (!d_rcr->get_proxy_server_host().empty()) {
337 DBG(cerr << "Setting up a proxy server." << endl);
338 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
339 << endl);
340 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
341 << endl);
342 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
343 << endl);
344 curl_easy_setopt(d_curl, CURLOPT_PROXY,
345 d_rcr->get_proxy_server_host().c_str());
346 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
347 d_rcr->get_proxy_server_port());
348
349 // As of 4/21/08 only NTLM, Digest and Basic work.
350#ifdef CURLOPT_PROXYAUTH
351 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
352#endif
353
354 // Password might not be required. 06/21/04 jhrg
355 if (!d_rcr->get_proxy_server_userpw().empty())
356 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
357 d_rcr->get_proxy_server_userpw().c_str());
358 }
359
360 // We have to set FailOnError to false for any of the non-Basic
361 // authentication schemes to work. 07/28/03 jhrg
362 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
363
364 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
365 // choosing the the 'safest' one supported by the server.
366 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
367 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
368
369 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
370 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
371 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
372 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
373 // param of save_raw_http_headers to a vector<string> object.
374
375 // Follow 302 (redirect) responses
376 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
377 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
378
379 // If the user turns off SSL validation...
380 if (d_rcr->get_validate_ssl() == 0) {
381 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
382 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
383 }
384
385 // Set libcurl to use netrc to access data behind URS auth.
386 // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
387 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
388
389 // Look to see if cookies are turned on in the .dodsrc file. If so,
390 // activate here. We honor 'session cookies' (cookies without an
391 // expiration date) here so that session-based SSO systems will work as
392 // expected.
393 if (!d_cookie_jar.empty()) {
394 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
395 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
396 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
397 }
398
399 if (www_trace) {
400 cerr << "Curl version: " << curl_version() << endl;
401 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
402 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
403 }
404}
405
409class BuildHeaders : public unary_function<const string &, void>
410{
411 struct curl_slist *d_cl;
412
413public:
414 BuildHeaders() : d_cl(0)
415 {}
416
417 void operator()(const string &header)
418 {
419 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
420 << endl);
421 d_cl = curl_slist_append(d_cl, header.c_str());
422 }
423
424 struct curl_slist *get_headers()
425 {
426 return d_cl;
427 }
428};
429
444long
445HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
446{
447 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
448
449#ifdef WIN32
450 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
451 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
452 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
453 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
454 // this issue is that one should not pass a FILE * to a windows DLL. Close
455 // inspection of libcurl yields that their default write function when using
456 // the CURLOPT_WRITEDATA is just "fwrite".
457 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
459#else
460 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
461#endif
462
463 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
464 ostream_iterator<string>(cerr, "\n")));
465
466 BuildHeaders req_hdrs;
467 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
468 req_hdrs);
469 if (headers)
470 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
471
472 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
473
474 // Turn off the proxy for this URL?
475 bool temporary_proxy = false;
476 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
477 DBG(cerr << "Suppress proxy for url: " << url << endl);
478 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
479 }
480
481 string::size_type at_sign = url.find('@');
482 // Assume username:password present *and* assume it's an HTTP URL; it *is*
483 // HTTPConnect, after all. 7 is position after "http://"; the second arg
484 // to substr() is the sub string length.
485 if (at_sign != url.npos)
486 d_upstring = url.substr(7, at_sign - 7);
487
488 if (!d_upstring.empty())
489 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
490
491 // Pass save_raw_http_headers() a pointer to the vector<string> where the
492 // response headers may be stored. Callers can use the resp_hdrs
493 // value/result parameter to get the raw response header information .
494 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
495
496 // This is the call that causes curl to go and get the remote resource and "write it down"
497 // utilizing the configuration state that has been previously conditioned by various perturbations
498 // of calls to curl_easy_setopt().
499 CURLcode res = curl_easy_perform(d_curl);
500
501 // Free the header list and null the value in d_curl.
502 curl_slist_free_all(req_hdrs.get_headers());
503 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
504
505 // Reset the proxy?
506 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
507 curl_easy_setopt(d_curl, CURLOPT_PROXY,
508 d_rcr->get_proxy_server_host().c_str());
509
510 if (res != 0)
511 throw Error(d_error_buffer);
512
513 long status;
514 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
515 if (res != 0)
516 throw Error(d_error_buffer);
517
518 char *ct_ptr = 0;
519 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
520 if (res == CURLE_OK && ct_ptr)
521 d_content_type = ct_ptr;
522 else
523 d_content_type = "";
524
525 return status;
526}
527
531bool
532HTTPConnect::url_uses_proxy_for(const string &url)
533{
534 if (d_rcr->is_proxy_for_used()) {
535 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
536 int index = 0, matchlen;
537 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
538 }
539
540 return false;
541}
542
546bool
547HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
548{
549 return d_rcr->is_no_proxy_for_used()
550 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
551}
552
553// Public methods. Mostly...
554
561HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
562 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
563
564{
565 d_accept_deflate = rcr->get_deflate();
566 d_rcr = rcr;
567
568 // Load in the default headers to send with a request. The empty Pragma
569 // headers overrides libcurl's default Pragma: no-cache header (which
570 // will disable caching by Squid, et c.). The User-Agent header helps
571 // make server logs more readable. 05/05/03 jhrg
572 d_request_headers.push_back(string("Pragma:"));
573 string user_agent = string("User-Agent: ") + string(CNAME)
574 + string("/") + string(CVER);
575 d_request_headers.push_back(user_agent);
576 if (d_accept_deflate)
577 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
578
579 // HTTPCache::instance returns a valid ptr or 0.
580 if (d_rcr->get_use_cache())
581 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
582 else
583 d_http_cache = 0;
584
585 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
586 << ")" << endl);
587
588 if (d_http_cache) {
589 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
590 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
591 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
592 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
593 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
594 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
595 }
596
597 d_cookie_jar = rcr->get_cookie_jar();
598
599 www_lib_init(); // This may throw either Error or InternalErr
600}
601
602HTTPConnect::~HTTPConnect()
603{
604 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
605
606 curl_easy_cleanup(d_curl);
607
608 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
609}
610
612class HeaderMatch : public unary_function<const string &, bool> {
613 const string &d_header;
614 public:
615 HeaderMatch(const string &header) : d_header(header) {}
616 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
617};
618
631HTTPResponse *
632HTTPConnect::fetch_url(const string &url)
633{
634#ifdef HTTP_TRACE
635 cout << "GET " << url << " HTTP/1.0" << endl;
636#endif
637
638 HTTPResponse *stream;
639
640 if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
641 stream = caching_fetch_url(url);
642 }
643 else {
644 stream = plain_fetch_url(url);
645 }
646
647#ifdef HTTP_TRACE
648 stringstream ss;
649 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
650 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
651 ss << stream->get_headers()->at(i) << endl;
652 }
653 cout << ss.str();
654#endif
655
656 ParseHeader parser;
657
658 // An apparent quirk of libcurl is that it does not pass the Content-type
659 // header to the callback used to save them, but check and add it from the
660 // saved state variable only if it's not there (without this a test failed
661 // in HTTPCacheTest). jhrg 11/12/13
662 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
663 HeaderMatch("Content-Type:")) == stream->get_headers()->end())
664 stream->get_headers()->push_back("Content-Type: " + d_content_type);
665
666 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
667
668#ifdef HTTP_TRACE
669 cout << endl << endl;
670#endif
671
672 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
673 if (parser.get_location() != "" &&
674 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
675 delete stream;
676 return fetch_url(parser.get_location());
677 }
678
679 stream->set_type(parser.get_object_type()); // uses the value of content-description
680
681 stream->set_version(parser.get_server());
682 stream->set_protocol(parser.get_protocol());
683
684 if (d_use_cpp_streams) {
685 stream->transform_to_cpp();
686 }
687
688 return stream;
689}
690
691// Look around for a reasonable place to put a temporary file. Check first
692// the value of the TMPDIR env var. If that does not yeild a path that's
693// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
694// defined in stdio.h. If both come up empty, then use `./'.
695
696// Change this to a version that either returns a string or an open file
697// descriptor. Use information from https://buildsecurityin.us-cert.gov/
698// (see open()) to make it more secure. Ideal solution: get deserialize()
699// methods to read from a stream returned by libcurl, not from a temporary
700// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
701static string
702get_tempfile_template(const string &file_template)
703{
704 string c;
705
706 // Windows has one idea of the standard name(s) for a temporary files dir
707#ifdef WIN32
708 // white list for a WIN32 directory
709 Regex directory("[-a-zA-Z0-9_:\\]*");
710
711 // If we're OK to use getenv(), try it.
712#ifdef USE_GETENV
713 c = getenv("TEMP");
714 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
715 goto valid_temp_directory;
716
717 c= getenv("TMP");
718 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
719 goto valid_temp_directory;
720#endif // USE_GETENV
721
722 // The windows default
723 c = "c:\tmp";
724 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
725 goto valid_temp_directory;
726
727#else // Unix/Linux/OSX has another...
728 // white list for a directory
729 Regex directory("[-a-zA-Z0-9_/]*");
730#ifdef USE_GETENV
731 c = getenv("TMPDIR");
732 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
733 goto valid_temp_directory;
734#endif // USE_GETENV
735
736 // Unix defines this sometimes - if present, use it.
737#ifdef P_tmpdir
738 if (access(P_tmpdir, W_OK | R_OK) == 0) {
739 c = P_tmpdir;
740 goto valid_temp_directory;
741 }
742#endif
743
744 // The Unix default
745 c = "/tmp";
746 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
747 goto valid_temp_directory;
748
749#endif // WIN32
750
751 // If we found nothing useful, use the current directory
752 c = ".";
753
754valid_temp_directory:
755
756#ifdef WIN32
757 c += "\\" + file_template;
758#else
759 c += "/" + file_template;
760#endif
761
762 return c;
763}
764
783string
784get_temp_file(FILE *&stream) throw(Error)
785{
786 string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
787
788 vector<char> pathname(dods_temp.length() + 1);
789
790 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
791
792 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
793
794 // Open truncated for update. NB: mkstemp() returns a file descriptor.
795#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
796 stream = fopen(_mktemp(&pathname[0]), "w+b");
797#else
798 // Make sure that temp files are accessible only by the owner.
799 int mask = umask(077);
800 if (mask < 0)
801 throw Error("Could not set the file creation mask: " + string(strerror(errno)));
802 int fd = mkstemp(&pathname[0]);
803 if (fd < 0)
804 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
805
806 stream = fdopen(fd, "w+");
807 umask(mask);
808#endif
809
810 if (!stream)
811 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
812
813 dods_temp = &pathname[0];
814 return dods_temp;
815}
816
817
823void
824close_temp(FILE *s, const string &name)
825{
826 int res = fclose(s);
827 if (res)
828 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
829
830 res = unlink(name.c_str());
831 if (res != 0)
832 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
833}
834
856HTTPResponse *
857HTTPConnect::caching_fetch_url(const string &url)
858{
859 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
860
861 vector<string> *headers = new vector<string>;
862 string file_name;
863 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
864 if (!s) {
865 // url not in cache; get it and cache it
866 DBGN(cerr << "no; getting response and caching." << endl);
867 delete headers; headers = 0;
868 time_t now = time(0);
869 HTTPResponse *rs = plain_fetch_url(url);
870 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
871
872 return rs;
873 }
874 else { // url in cache
875 DBGN(cerr << "yes... ");
876
877 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
878 DBGN(cerr << "and it's valid; using cached response." << endl);
879 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
880 return crs;
881 }
882 else { // url in cache but not valid; validate
883 DBGN(cerr << "but it's not valid; validating... ");
884
885 d_http_cache->release_cached_response(s); // This closes 's'
886 headers->clear();
887 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
888 FILE *body = 0;
889 string dods_temp = get_temp_file(body);
890 time_t now = time(0); // When was the request made (now).
891 long http_status;
892
893 try {
894 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
895 rewind(body);
896 }
897 catch (Error &e) {
898 close_temp(body, dods_temp);
899 delete headers;
900 throw ;
901 }
902
903 switch (http_status) {
904 case 200: { // New headers and new body
905 DBGN(cerr << "read a new response; caching." << endl);
906
907 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
908 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
909
910 return rs;
911 }
912
913 case 304: { // Just new headers, use cached body
914 DBGN(cerr << "cached response valid; updating." << endl);
915
916 close_temp(body, dods_temp);
917 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
918 string file_name;
919 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
920 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
921 return crs;
922 }
923
924 default: { // Oops.
925 close_temp(body, dods_temp);
926 if (http_status >= 400) {
927 delete headers; headers = 0;
928 string msg = "Error while reading the URL: ";
929 msg += url;
930 msg
931 += ".\nThe OPeNDAP server returned the following message:\n";
932 msg += http_status_to_string(http_status);
933 throw Error(msg);
934 }
935 else {
936 delete headers; headers = 0;
937 throw InternalErr(__FILE__, __LINE__,
938 "Bad response from the HTTP server: " + long_to_string(http_status));
939 }
940 }
941 }
942 }
943 }
944
945 throw InternalErr(__FILE__, __LINE__, "Should never get here");
946}
947
959HTTPResponse *
960HTTPConnect::plain_fetch_url(const string &url)
961{
962 DBG(cerr << "Getting URL: " << url << endl);
963 FILE *stream = 0;
964 string dods_temp = get_temp_file(stream);
965 vector<string> *resp_hdrs = new vector<string>;
966
967 int status = -1;
968 try {
969 status = read_url(url, stream, resp_hdrs); // Throws Error.
970 if (status >= 400) {
971 // delete resp_hdrs; resp_hdrs = 0;
972 string msg = "Error while reading the URL: ";
973 msg += url;
974 msg += ".\nThe OPeNDAP server returned the following message:\n";
975 msg += http_status_to_string(status);
976 throw Error(msg);
977 }
978 }
979
980 catch (Error &e) {
981 delete resp_hdrs;
982 close_temp(stream, dods_temp);
983 throw;
984 }
985
986#if 0
987 if (d_use_cpp_streams) {
988 fclose(stream);
989 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
990 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
991 }
992 else {
993#endif
994 rewind(stream);
995 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
996#if 0
997}
998#endif
999}
1000
1012void
1014{
1015 d_accept_deflate = deflate;
1016
1017 if (d_accept_deflate) {
1018 if (find(d_request_headers.begin(), d_request_headers.end(),
1019 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1020 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1021 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1022 ostream_iterator<string>(cerr, "\n")));
1023 }
1024 else {
1025 vector<string>::iterator i;
1026 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1027 bind2nd(equal_to<string>(),
1028 string("Accept-Encoding: deflate, gzip, compress")));
1029 d_request_headers.erase(i, d_request_headers.end());
1030 }
1031}
1032
1041void
1043{
1044 // Look for, and remove if one exists, an XDAP-Accept header
1045 vector<string>::iterator i;
1046 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1047 HeaderMatch("XDAP-Accept:"));
1048 if (i != d_request_headers.end())
1049 d_request_headers.erase(i);
1050
1051 // Record and add the new header value
1052 d_dap_client_protocol_major = major;
1053 d_dap_client_protocol_minor = minor;
1054 ostringstream xdap_accept;
1055 xdap_accept << "XDAP-Accept: " << major << "." << minor;
1056
1057 d_request_headers.push_back(xdap_accept.str());
1058
1059 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1060 ostream_iterator<string>(cerr, "\n")));
1061}
1062
1078void
1079HTTPConnect::set_credentials(const string &u, const string &p)
1080{
1081 if (u.empty())
1082 return;
1083
1084 // Store the credentials locally.
1085 d_username = u;
1086 d_password = p;
1087
1088 d_upstring = u + ":" + p;
1089}
1090
1091} // namespace libdap
A class for error processing.
Definition: Error.h:94
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1156
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1571
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1249
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1388
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1319
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1480
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:632
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition: InternalErr.h:65
string get_proxy_server_host() const
Get the proxy host.
Definition: RCReader.h:181
int get_proxy_server_port() const
Get the proxy port.
Definition: RCReader.h:186
string get_proxy_server_userpw() const
Get the proxy username and password.
Definition: RCReader.h:191
string get_proxy_for_regexp() const
Definition: RCReader.h:215
bool is_proxy_for_used()
Definition: RCReader.h:210
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:912
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:784
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:824
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58