#include "config.h"
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/uio.h>
#include "nsd.h"
#include "xfrd-tcp.h"
#include "buffer.h"
#include "packet.h"
#include "dname.h"
#include "options.h"
#include "namedb.h"
#include "xfrd.h"
#include "xfrd-disk.h"
#include "util.h"
#ifdef HAVE_SSL
#include <openssl/ssl.h>
#include <openssl/err.h>
#include <openssl/x509.h>
#include <openssl/evp.h>
#endif
#ifdef HAVE_SSL
void log_crypto_err(const char* str);
void
get_cert_info(SSL* ssl, region_type* region, char** cert_serial,
char** key_id, char** cert_algorithm, char** tls_version)
{
X509* cert = NULL;
ASN1_INTEGER* serial = NULL;
EVP_PKEY* pkey = NULL;
unsigned char key_fingerprint[EVP_MAX_MD_SIZE];
unsigned int key_fingerprint_len = 0;
const EVP_MD* md = EVP_sha256();
const char* pkey_name = NULL;
const char* version_name = NULL;
int i;
char temp_buffer[1024];
*cert_serial = NULL;
*key_id = NULL;
*cert_algorithm = NULL;
*tls_version = NULL;
#ifdef HAVE_SSL_GET1_PEER_CERTIFICATE
cert = SSL_get1_peer_certificate(ssl);
#else
cert = SSL_get_peer_certificate(ssl);
#endif
if (!cert) {
return;
}
serial = X509_get_serialNumber(cert);
if (serial) {
BIGNUM* bn = ASN1_INTEGER_to_BN(serial, NULL);
if (bn) {
char* hex_serial = BN_bn2hex(bn);
if (hex_serial) {
snprintf(temp_buffer, sizeof(temp_buffer), "%s", hex_serial);
*cert_serial = region_strdup(region, temp_buffer);
OPENSSL_free(hex_serial);
}
BN_free(bn);
}
}
if (X509_pubkey_digest(cert, md, key_fingerprint, &key_fingerprint_len) == 1 && key_fingerprint_len >= 8) {
size_t id_len = 8;
char key_id_buffer[17];
for (i = 0; i < (int)id_len; i++) {
snprintf(key_id_buffer + (i * 2), sizeof(key_id_buffer) - (i * 2), "%02x", key_fingerprint[i]);
}
*key_id = region_strdup(region, key_id_buffer);
}
pkey = X509_get_pubkey(cert);
if (pkey) {
#ifdef HAVE_EVP_PKEY_GET0_TYPE_NAME
pkey_name = EVP_PKEY_get0_type_name(pkey);
#else
pkey_name = OBJ_nid2sn(EVP_PKEY_type(EVP_PKEY_id(pkey)));
#endif
if (pkey_name) {
*cert_algorithm = region_strdup(region, pkey_name);
} else {
int pkey_type = EVP_PKEY_id(pkey);
char algo_buffer[32];
snprintf(algo_buffer, sizeof(algo_buffer), "Unknown(%d)", pkey_type);
*cert_algorithm = region_strdup(region, algo_buffer);
}
EVP_PKEY_free(pkey);
}
version_name = SSL_get_version(ssl);
if (version_name) {
*tls_version = region_strdup(region, version_name);
} else {
int version = SSL_version(ssl);
char version_buffer[16];
snprintf(version_buffer, sizeof(version_buffer), "Unknown(%d)", version);
*tls_version = region_strdup(region, version_buffer);
}
X509_free(cert);
}
static SSL_CTX*
create_ssl_context()
{
SSL_CTX *ctx;
unsigned char protos[] = { 3, 'd', 'o', 't' };
ctx = SSL_CTX_new(TLS_client_method());
if (!ctx) {
log_msg(LOG_ERR, "xfrd tls: Unable to create SSL ctxt");
}
else if (SSL_CTX_set_default_verify_paths(ctx) != 1) {
SSL_CTX_free(ctx);
log_msg(LOG_ERR, "xfrd tls: Unable to set default SSL verify paths");
return NULL;
}
else if (!SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION)) {
SSL_CTX_free(ctx);
log_msg(LOG_ERR, "xfrd tls: Unable to set minimum TLS version 1.3");
return NULL;
}
if (SSL_CTX_set_alpn_protos(ctx, protos, sizeof(protos)) != 0) {
SSL_CTX_free(ctx);
log_msg(LOG_ERR, "xfrd tls: Unable to set ALPN protocols");
return NULL;
}
return ctx;
}
static int
tls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx)
{
int err = X509_STORE_CTX_get_error(ctx);
int depth = X509_STORE_CTX_get_error_depth(ctx);
if (!preverify_ok)
log_msg(LOG_ERR, "xfrd tls: TLS verify failed - (%d) depth: %d error: %s",
err,
depth,
X509_verify_cert_error_string(err));
return preverify_ok;
}
static int
setup_ssl(struct xfrd_tcp_pipeline* tp, struct xfrd_tcp_set* tcp_set,
const char* auth_domain_name)
{
if (!tcp_set->ssl_ctx) {
log_msg(LOG_ERR, "xfrd tls: No TLS CTX, cannot set up XFR-over-TLS");
return 0;
}
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: setting up TLS for tls_auth domain name %s",
auth_domain_name));
tp->ssl = SSL_new((SSL_CTX*)tcp_set->ssl_ctx);
if(!tp->ssl) {
log_msg(LOG_ERR, "xfrd tls: Unable to create TLS object");
return 0;
}
SSL_set_connect_state(tp->ssl);
(void)SSL_set_mode(tp->ssl, SSL_MODE_AUTO_RETRY);
if(!SSL_set_fd(tp->ssl, tp->tcp_w->fd)) {
log_msg(LOG_ERR, "xfrd tls: Unable to set TLS fd");
SSL_free(tp->ssl);
tp->ssl = NULL;
return 0;
}
SSL_set_verify(tp->ssl, SSL_VERIFY_PEER, tls_verify_callback);
if(!SSL_set1_host(tp->ssl, auth_domain_name)) {
log_msg(LOG_ERR, "xfrd tls: TLS setting of hostname %s failed",
auth_domain_name);
SSL_free(tp->ssl);
tp->ssl = NULL;
return 0;
}
return 1;
}
static int
ssl_handshake(struct xfrd_tcp_pipeline* tp)
{
int ret;
ERR_clear_error();
ret = SSL_do_handshake(tp->ssl);
if(ret == 1) {
DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: TLS handshake successful"));
tp->handshake_done = 1;
return 1;
}
tp->handshake_want = SSL_get_error(tp->ssl, ret);
if(tp->handshake_want == SSL_ERROR_WANT_READ
|| tp->handshake_want == SSL_ERROR_WANT_WRITE)
return 1;
return 0;
}
int password_cb(char *buf, int size, int ATTR_UNUSED(rwflag), void *u)
{
strlcpy(buf, (char*)u, size);
return strlen(buf);
}
#endif
static int
xfrd_pipe_cmp(const void* a, const void* b)
{
const struct xfrd_tcp_pipeline* x = (struct xfrd_tcp_pipeline*)a;
const struct xfrd_tcp_pipeline* y = (struct xfrd_tcp_pipeline*)b;
int r;
if(x == y)
return 0;
if(y->key.ip_len != x->key.ip_len)
return (int)y->key.ip_len - (int)x->key.ip_len;
r = memcmp(&x->key.ip, &y->key.ip, x->key.ip_len);
if(r != 0)
return r;
if(x->key.num_unused != y->key.num_unused) {
return (x->key.num_unused < y->key.num_unused) ? -1 : 1;
}
return (uintptr_t)x < (uintptr_t)y ? -1 : 1;
}
struct xfrd_tcp_set* xfrd_tcp_set_create(struct region* region, const char *tls_cert_bundle, int tcp_max, int tcp_pipeline)
{
int i;
struct xfrd_tcp_set* tcp_set = region_alloc(region,
sizeof(struct xfrd_tcp_set));
memset(tcp_set, 0, sizeof(struct xfrd_tcp_set));
tcp_set->tcp_state = NULL;
tcp_set->tcp_max = tcp_max;
tcp_set->tcp_pipeline = tcp_pipeline;
tcp_set->tcp_count = 0;
tcp_set->tcp_waiting_first = 0;
tcp_set->tcp_waiting_last = 0;
#ifdef HAVE_TLS_1_3
tcp_set->ssl_ctx = create_ssl_context();
if (tcp_set->ssl_ctx == NULL)
log_msg(LOG_ERR, "xfrd: XFR-over-TLS not available");
else if (tls_cert_bundle && tls_cert_bundle[0] && SSL_CTX_load_verify_locations(
tcp_set->ssl_ctx, tls_cert_bundle, NULL) != 1) {
log_msg(LOG_ERR, "xfrd tls: Unable to set the certificate bundle file %s",
tls_cert_bundle);
}
#else
(void)tls_cert_bundle;
log_msg(LOG_INFO, "xfrd: No TLS 1.3 support - XFR-over-TLS not available");
#endif
tcp_set->tcp_state = region_alloc(region,
sizeof(*tcp_set->tcp_state)*tcp_set->tcp_max);
for(i=0; i<tcp_set->tcp_max; i++)
tcp_set->tcp_state[i] = xfrd_tcp_pipeline_create(region,
tcp_pipeline);
tcp_set->pipetree = rbtree_create(region, &xfrd_pipe_cmp);
return tcp_set;
}
static int pipeline_id_compare(const void* x, const void* y)
{
struct xfrd_tcp_pipeline_id* a = (struct xfrd_tcp_pipeline_id*)x;
struct xfrd_tcp_pipeline_id* b = (struct xfrd_tcp_pipeline_id*)y;
if(a->id < b->id)
return -1;
if(a->id > b->id)
return 1;
return 0;
}
void pick_id_values(uint16_t* array, int num, int max)
{
uint8_t inserted[65536];
int j, done;
if(num == 65536) {
int i;
for(i=0; i<num; i++)
array[i] = (uint16_t)i;
return;
}
assert(max <= 65536);
memset(inserted, 0, sizeof(inserted[0])*max);
done=0;
for(j = max-num; j<max; j++) {
int t;
if(j+1 <= 1)
t = 0;
else t = random_generate(j+1);
if(!inserted[t]) {
array[done++]=t;
inserted[t] = 1;
} else {
array[done++]=j;
inserted[j] = 1;
}
}
}
static void
clear_pipeline_entry(struct xfrd_tcp_pipeline* tp, rbnode_type* node)
{
struct xfrd_tcp_pipeline_id *n;
if(node == NULL || node == RBTREE_NULL)
return;
clear_pipeline_entry(tp, node->left);
node->left = NULL;
clear_pipeline_entry(tp, node->right);
node->right = NULL;
n = (struct xfrd_tcp_pipeline_id*)node;
n->next_free = tp->pipe_id_free_list;
tp->pipe_id_free_list = n;
}
static void
xfrd_tcp_pipeline_cleanup(struct xfrd_tcp_pipeline* tp)
{
clear_pipeline_entry(tp, tp->zone_per_id->root);
tp->zone_per_id->count = 0;
tp->zone_per_id->root = RBTREE_NULL;
}
static void
xfrd_tcp_pipeline_init(struct xfrd_tcp_pipeline* tp)
{
tp->key.node.key = tp;
tp->key.num_unused = tp->pipe_num;
tp->key.num_skip = 0;
tp->tcp_send_first = NULL;
tp->tcp_send_last = NULL;
xfrd_tcp_pipeline_cleanup(tp);
pick_id_values(tp->unused, tp->pipe_num, 65536);
}
struct xfrd_tcp_pipeline*
xfrd_tcp_pipeline_create(region_type* region, int tcp_pipeline)
{
int i;
struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)
region_alloc_zero(region, sizeof(*tp));
if(tcp_pipeline < 0)
tcp_pipeline = 0;
if(tcp_pipeline > 65536)
tcp_pipeline = 65536;
tp->pipe_num = tcp_pipeline;
tp->key.num_unused = tp->pipe_num;
tp->zone_per_id = rbtree_create(region, &pipeline_id_compare);
tp->pipe_id_free_list = NULL;
for(i=0; i<tp->pipe_num; i++) {
struct xfrd_tcp_pipeline_id* n = (struct xfrd_tcp_pipeline_id*)
region_alloc_zero(region, sizeof(*n));
n->next_free = tp->pipe_id_free_list;
tp->pipe_id_free_list = n;
}
tp->unused = (uint16_t*)region_alloc_zero(region,
sizeof(tp->unused[0])*tp->pipe_num);
tp->tcp_r = xfrd_tcp_create(region, QIOBUFSZ);
tp->tcp_w = xfrd_tcp_create(region, QIOBUFSZ);
xfrd_tcp_pipeline_init(tp);
return tp;
}
static struct xfrd_zone*
xfrd_tcp_pipeline_lookup_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
{
struct xfrd_tcp_pipeline_id key;
rbnode_type* n;
memset(&key, 0, sizeof(key));
key.node.key = &key;
key.id = id;
n = rbtree_search(tp->zone_per_id, &key);
if(n && n != RBTREE_NULL) {
return ((struct xfrd_tcp_pipeline_id*)n)->zone;
}
return NULL;
}
static void
xfrd_tcp_pipeline_insert_id(struct xfrd_tcp_pipeline* tp, uint16_t id,
struct xfrd_zone* zone)
{
struct xfrd_tcp_pipeline_id* n;
assert(tp->pipe_id_free_list != NULL);
n = tp->pipe_id_free_list;
tp->pipe_id_free_list = n->next_free;
n->next_free = NULL;
memset(&n->node, 0, sizeof(n->node));
n->node.key = n;
n->id = id;
n->zone = zone;
rbtree_insert(tp->zone_per_id, &n->node);
}
static void
xfrd_tcp_pipeline_remove_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
{
struct xfrd_tcp_pipeline_id key;
rbnode_type* node;
memset(&key, 0, sizeof(key));
key.node.key = &key;
key.id = id;
node = rbtree_delete(tp->zone_per_id, &key);
if(node && node != RBTREE_NULL) {
struct xfrd_tcp_pipeline_id* n =
(struct xfrd_tcp_pipeline_id*)node;
n->next_free = tp->pipe_id_free_list;
tp->pipe_id_free_list = n;
}
}
static void
xfrd_tcp_pipeline_skip_id(struct xfrd_tcp_pipeline* tp, uint16_t id)
{
struct xfrd_tcp_pipeline_id key;
rbnode_type* n;
memset(&key, 0, sizeof(key));
key.node.key = &key;
key.id = id;
n = rbtree_search(tp->zone_per_id, &key);
if(n && n != RBTREE_NULL) {
struct xfrd_tcp_pipeline_id* zid = (struct xfrd_tcp_pipeline_id*)n;
zid->zone = TCP_NULL_SKIP;
}
}
void
xfrd_setup_packet(buffer_type* packet,
uint16_t type, uint16_t klass, const dname_type* dname, uint16_t qid,
int* apex_compress)
{
buffer_clear(packet);
ID_SET(packet, qid);
FLAGS_SET(packet, 0);
OPCODE_SET(packet, OPCODE_QUERY);
QDCOUNT_SET(packet, 1);
ANCOUNT_SET(packet, 0);
NSCOUNT_SET(packet, 0);
ARCOUNT_SET(packet, 0);
buffer_skip(packet, QHEADERSZ);
if(apex_compress)
*apex_compress = buffer_position(packet);
buffer_write(packet, dname_name(dname), dname->name_size);
buffer_write_u16(packet, type);
buffer_write_u16(packet, klass);
}
static socklen_t
#ifdef INET6
xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
struct sockaddr_storage *sck)
#else
xfrd_acl_sockaddr(acl_options_type* acl, unsigned int port,
struct sockaddr_in *sck, const char* fromto)
#endif
{
#ifdef INET6
memset(sck, 0, sizeof(struct sockaddr_storage));
#else
memset(sck, 0, sizeof(struct sockaddr_in));
#endif
if(acl->is_ipv6) {
#ifdef INET6
struct sockaddr_in6* sa = (struct sockaddr_in6*)sck;
sa->sin6_family = AF_INET6;
sa->sin6_port = htons(port);
sa->sin6_addr = acl->addr.addr6;
return sizeof(struct sockaddr_in6);
#else
log_msg(LOG_ERR, "xfrd: IPv6 connection %s %s attempted but no \
INET6.", fromto, acl->ip_address_spec);
return 0;
#endif
} else {
struct sockaddr_in* sa = (struct sockaddr_in*)sck;
sa->sin_family = AF_INET;
sa->sin_port = htons(port);
sa->sin_addr = acl->addr.addr;
return sizeof(struct sockaddr_in);
}
}
socklen_t
#ifdef INET6
xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_storage *to)
#else
xfrd_acl_sockaddr_to(acl_options_type* acl, struct sockaddr_in *to)
#endif
{
#ifdef HAVE_TLS_1_3
unsigned int port = acl->port?acl->port:(acl->tls_auth_options?
(unsigned)atoi(TLS_PORT):(unsigned)atoi(TCP_PORT));
#else
unsigned int port = acl->port?acl->port:(unsigned)atoi(TCP_PORT);
#endif
#ifdef INET6
return xfrd_acl_sockaddr(acl, port, to);
#else
return xfrd_acl_sockaddr(acl, port, to, "to");
#endif
}
socklen_t
#ifdef INET6
xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_storage *frm)
#else
xfrd_acl_sockaddr_frm(acl_options_type* acl, struct sockaddr_in *frm)
#endif
{
unsigned int port = acl->port?acl->port:0;
#ifdef INET6
return xfrd_acl_sockaddr(acl, port, frm);
#else
return xfrd_acl_sockaddr(acl, port, frm, "from");
#endif
}
void
xfrd_write_soa_buffer(struct buffer* packet,
const dname_type* apex, struct xfrd_soa* soa, int apex_compress)
{
size_t rdlength_pos;
uint16_t rdlength;
if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
apex->name_size > 1)
buffer_write_u16(packet, 0xc000 | apex_compress);
else buffer_write(packet, dname_name(apex), apex->name_size);
buffer_write(packet, &soa->type, sizeof(soa->type));
buffer_write(packet, &soa->klass, sizeof(soa->klass));
buffer_write(packet, &soa->ttl, sizeof(soa->ttl));
rdlength_pos = buffer_position(packet);
buffer_skip(packet, sizeof(rdlength));
if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
apex->name_size > 1 && is_dname_subdomain_of_case(
soa->prim_ns+1, soa->prim_ns[0], dname_name(apex),
apex->name_size)) {
if(soa->prim_ns[0] > apex->name_size)
buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]-
apex->name_size);
buffer_write_u16(packet, 0xc000 | apex_compress);
} else {
buffer_write(packet, soa->prim_ns+1, soa->prim_ns[0]);
}
if(apex_compress > 0 && apex_compress < (int)buffer_limit(packet) &&
apex->name_size > 1 && is_dname_subdomain_of_case(soa->email+1,
soa->email[0], dname_name(apex), apex->name_size)) {
if(soa->email[0] > apex->name_size)
buffer_write(packet, soa->email+1, soa->email[0]-
apex->name_size);
buffer_write_u16(packet, 0xc000 | apex_compress);
} else {
buffer_write(packet, soa->email+1, soa->email[0]);
}
buffer_write(packet, &soa->serial, sizeof(uint32_t));
buffer_write(packet, &soa->refresh, sizeof(uint32_t));
buffer_write(packet, &soa->retry, sizeof(uint32_t));
buffer_write(packet, &soa->expire, sizeof(uint32_t));
buffer_write(packet, &soa->minimum, sizeof(uint32_t));
rdlength = buffer_position(packet) - rdlength_pos - sizeof(rdlength);
buffer_write_u16_at(packet, rdlength_pos, rdlength);
}
struct xfrd_tcp*
xfrd_tcp_create(region_type* region, size_t bufsize)
{
struct xfrd_tcp* tcp_state = (struct xfrd_tcp*)region_alloc(
region, sizeof(struct xfrd_tcp));
memset(tcp_state, 0, sizeof(struct xfrd_tcp));
tcp_state->packet = buffer_create(region, bufsize);
tcp_state->fd = -1;
return tcp_state;
}
static struct xfrd_tcp_pipeline*
pipeline_find(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
{
rbnode_type* sme = NULL;
struct xfrd_tcp_pipeline* r;
struct xfrd_tcp_pipeline_key k, *key=&k;
key->node.key = key;
key->ip_len = xfrd_acl_sockaddr_to(zone->master, &key->ip);
key->num_unused = set->tcp_pipeline;
if(rbtree_find_less_equal(set->pipetree, key, &sme)) {
assert(0);
}
if(!sme)
return NULL;
r = (struct xfrd_tcp_pipeline*)sme->key;
if(r->key.ip_len != key->ip_len)
return NULL;
if(memcmp(&r->key.ip, &key->ip, key->ip_len) != 0)
return NULL;
if(r->key.num_unused == 0)
return NULL;
return r;
}
static void
tcp_zone_waiting_list_popfirst(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
{
assert(zone->tcp_waiting);
set->tcp_waiting_first = zone->tcp_waiting_next;
if(zone->tcp_waiting_next)
zone->tcp_waiting_next->tcp_waiting_prev = NULL;
else set->tcp_waiting_last = 0;
zone->tcp_waiting_next = 0;
zone->tcp_waiting = 0;
}
static void
tcp_pipe_sendlist_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
{
if(zone->in_tcp_send) {
if(zone->tcp_send_prev)
zone->tcp_send_prev->tcp_send_next=zone->tcp_send_next;
else tp->tcp_send_first=zone->tcp_send_next;
if(zone->tcp_send_next)
zone->tcp_send_next->tcp_send_prev=zone->tcp_send_prev;
else tp->tcp_send_last=zone->tcp_send_prev;
zone->in_tcp_send = 0;
}
}
static void
tcp_pipe_sendlist_popfirst(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
{
tp->tcp_send_first = zone->tcp_send_next;
if(tp->tcp_send_first)
tp->tcp_send_first->tcp_send_prev = NULL;
else tp->tcp_send_last = NULL;
zone->in_tcp_send = 0;
}
static void
tcp_pipe_id_remove(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone,
int alsotree)
{
assert(tp->key.num_unused < tp->pipe_num && tp->key.num_unused >= 0);
if(alsotree)
xfrd_tcp_pipeline_remove_id(tp, zone->query_id);
tp->unused[tp->key.num_unused] = zone->query_id;
(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
tp->key.num_unused++;
(void)rbtree_insert(xfrd->tcp_set->pipetree, &tp->key.node);
}
static void
xfrd_tcp_pipe_stop(struct xfrd_tcp_pipeline* tp)
{
struct xfrd_tcp_pipeline_id* zid;
int conn = -1;
assert(tp->key.num_unused < tp->pipe_num);
assert(tp->pipe_num - tp->key.num_unused > tp->key.num_skip);
RBTREE_FOR(zid, struct xfrd_tcp_pipeline_id*, tp->zone_per_id) {
xfrd_zone_type* zone = zid->zone;
if(zone && zone != TCP_NULL_SKIP) {
assert(zone->query_id == zid->id);
conn = zone->tcp_conn;
zone->tcp_conn = -1;
zone->tcp_waiting = 0;
tcp_pipe_sendlist_remove(tp, zone);
tcp_pipe_id_remove(tp, zone, 0);
xfrd_set_refresh_now(zone);
}
}
xfrd_tcp_pipeline_cleanup(tp);
assert(conn != -1);
xfrd_tcp_pipe_release(xfrd->tcp_set, tp, conn);
}
static void
tcp_pipe_reset_timeout(struct xfrd_tcp_pipeline* tp)
{
int fd = tp->handler.ev_fd;
struct timeval tv;
tv.tv_sec = xfrd->tcp_set->tcp_timeout;
tv.tv_usec = 0;
if(tp->handler_added)
event_del(&tp->handler);
memset(&tp->handler, 0, sizeof(tp->handler));
event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
#ifdef HAVE_TLS_1_3
( tp->ssl
? ( tp->handshake_done ? ( tp->tcp_send_first ? EV_WRITE : 0 )
: tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0 )
: tp->tcp_send_first ? EV_WRITE : 0 ),
#else
( tp->tcp_send_first ? EV_WRITE : 0 ),
#endif
xfrd_handle_tcp_pipe, tp);
if(event_base_set(xfrd->event_base, &tp->handler) != 0)
log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
if(event_add(&tp->handler, &tv) != 0)
log_msg(LOG_ERR, "xfrd tcp: event_add failed");
tp->handler_added = 1;
}
void
xfrd_handle_tcp_pipe(int ATTR_UNUSED(fd), short event, void* arg)
{
struct xfrd_tcp_pipeline* tp = (struct xfrd_tcp_pipeline*)arg;
if((event & EV_WRITE)) {
tcp_pipe_reset_timeout(tp);
if(tp->tcp_send_first) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp write, zone %s",
tp->tcp_send_first->apex_str));
xfrd_tcp_write(tp, tp->tcp_send_first);
}
}
if((event & EV_READ) && tp->handler_added) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp read"));
tcp_pipe_reset_timeout(tp);
xfrd_tcp_read(tp);
}
if((event & EV_TIMEOUT) && tp->handler_added) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: event tcp timeout"));
xfrd_tcp_pipe_stop(tp);
}
}
static void
pipeline_setup_new_zone(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
xfrd_zone_type* zone)
{
int idx;
assert(tp->key.num_unused > 0);
idx = random_generate(tp->key.num_unused);
zone->query_id = tp->unused[idx];
tp->unused[idx] = tp->unused[tp->key.num_unused-1];
xfrd_tcp_pipeline_insert_id(tp, zone->query_id, zone);
(void)rbtree_delete(set->pipetree, &tp->key.node);
tp->key.num_unused--;
(void)rbtree_insert(set->pipetree, &tp->key.node);
zone->tcp_send_next = NULL;
zone->tcp_send_prev = tp->tcp_send_last;
zone->in_tcp_send = 1;
if(tp->tcp_send_last)
tp->tcp_send_last->tcp_send_next = zone;
else tp->tcp_send_first = zone;
tp->tcp_send_last = zone;
if(tp->tcp_send_first == zone) {
xfrd_tcp_setup_write_packet(tp, zone);
tcp_pipe_reset_timeout(tp);
}
}
void
xfrd_tcp_obtain(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
{
struct xfrd_tcp_pipeline* tp;
assert(zone->tcp_conn == -1);
assert(zone->tcp_waiting == 0);
if(set->tcp_count < set->tcp_max) {
int i;
assert(!set->tcp_waiting_first);
set->tcp_count ++;
for(i=0; i<set->tcp_max; i++) {
if(set->tcp_state[i]->tcp_r->fd == -1) {
zone->tcp_conn = i;
break;
}
}
if (zone->tcp_conn < 0) {
return;
}
tp = set->tcp_state[zone->tcp_conn];
zone->tcp_waiting = 0;
if(zone->zone_handler.ev_fd != -1)
xfrd_udp_release(zone);
if(!xfrd_tcp_open(set, tp, zone)) {
zone->tcp_conn = -1;
set->tcp_count --;
xfrd_set_refresh_now(zone);
return;
}
xfrd_tcp_pipeline_init(tp);
(void)rbtree_insert(set->pipetree, &tp->key.node);
xfrd_deactivate_zone(zone);
xfrd_unset_timer(zone);
pipeline_setup_new_zone(set, tp, zone);
return;
}
if((tp = pipeline_find(set, zone))!= NULL) {
int i;
if(zone->zone_handler.ev_fd != -1)
xfrd_udp_release(zone);
for(i=0; i<set->tcp_max; i++) {
if(set->tcp_state[i] == tp)
zone->tcp_conn = i;
}
xfrd_deactivate_zone(zone);
xfrd_unset_timer(zone);
pipeline_setup_new_zone(set, tp, zone);
return;
}
DEBUG(DEBUG_XFRD,2, (LOG_INFO, "xfrd: max number of tcp "
"connections (%d) reached.", set->tcp_max));
zone->tcp_waiting_next = 0;
zone->tcp_waiting_prev = set->tcp_waiting_last;
zone->tcp_waiting = 1;
if(!set->tcp_waiting_last) {
set->tcp_waiting_first = zone;
set->tcp_waiting_last = zone;
} else {
set->tcp_waiting_last->tcp_waiting_next = zone;
set->tcp_waiting_last = zone;
}
xfrd_deactivate_zone(zone);
xfrd_unset_timer(zone);
}
int
xfrd_tcp_open(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
xfrd_zone_type* zone)
{
int fd, family, conn;
struct timeval tv;
assert(zone->tcp_conn != -1);
if(!zone->master) {
zone->master = zone->zone_options->pattern->request_xfr;
zone->master_num = 0;
}
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s open tcp conn to %s",
zone->apex_str, zone->master->ip_address_spec));
tp->tcp_r->is_reading = 1;
tp->tcp_r->total_bytes = 0;
tp->tcp_r->msglen = 0;
buffer_clear(tp->tcp_r->packet);
tp->tcp_w->is_reading = 0;
tp->tcp_w->total_bytes = 0;
tp->tcp_w->msglen = 0;
tp->connection_established = 0;
if(zone->master->is_ipv6) {
#ifdef INET6
family = PF_INET6;
#else
xfrd_set_refresh_now(zone);
return 0;
#endif
} else {
family = PF_INET;
}
fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
if(fd == -1) {
if(errno != EAFNOSUPPORT || verbosity > 2)
log_msg(LOG_ERR, "xfrd: %s cannot create tcp socket: %s",
zone->master->ip_address_spec, strerror(errno));
xfrd_set_refresh_now(zone);
return 0;
}
if(fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
log_msg(LOG_ERR, "xfrd: fcntl failed: %s", strerror(errno));
close(fd);
xfrd_set_refresh_now(zone);
return 0;
}
if(xfrd->nsd->outgoing_tcp_mss > 0) {
#if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
if(setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
(void*)&xfrd->nsd->outgoing_tcp_mss,
sizeof(xfrd->nsd->outgoing_tcp_mss)) < 0) {
log_msg(LOG_ERR, "xfrd: setsockopt(TCP_MAXSEG)"
"failed: %s", strerror(errno));
}
#else
log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
#endif
}
tp->key.ip_len = xfrd_acl_sockaddr_to(zone->master, &tp->key.ip);
if (!xfrd_bind_local_interface(fd, zone->zone_options->pattern->
outgoing_interface, zone->master, 1)) {
close(fd);
xfrd_set_refresh_now(zone);
return 0;
}
conn = connect(fd, (struct sockaddr*)&tp->key.ip, tp->key.ip_len);
if (conn == -1 && errno != EINPROGRESS) {
log_msg(LOG_ERR, "xfrd: connect %s failed: %s",
zone->master->ip_address_spec, strerror(errno));
close(fd);
xfrd_set_refresh_now(zone);
return 0;
}
tp->tcp_r->fd = fd;
tp->tcp_w->fd = fd;
if (zone->master->tls_auth_options &&
zone->master->tls_auth_options->auth_domain_name) {
#ifdef HAVE_TLS_1_3
if (zone->master->tls_auth_options->client_cert &&
zone->master->tls_auth_options->client_key) {
if (SSL_CTX_use_certificate_chain_file(set->ssl_ctx,
zone->master->tls_auth_options->client_cert) != 1) {
log_msg(LOG_ERR, "xfrd tls: Unable to load client certificate from file %s", zone->master->tls_auth_options->client_cert);
}
if (zone->master->tls_auth_options->client_key_pw) {
SSL_CTX_set_default_passwd_cb(set->ssl_ctx, password_cb);
SSL_CTX_set_default_passwd_cb_userdata(set->ssl_ctx, zone->master->tls_auth_options->client_key_pw);
}
if (SSL_CTX_use_PrivateKey_file(set->ssl_ctx, zone->master->tls_auth_options->client_key, SSL_FILETYPE_PEM) != 1) {
log_msg(LOG_ERR, "xfrd tls: Unable to load private key from file %s", zone->master->tls_auth_options->client_key);
}
if (!SSL_CTX_check_private_key(set->ssl_ctx)) {
log_msg(LOG_ERR, "xfrd tls: Client private key from file %s does not match the certificate from file %s",
zone->master->tls_auth_options->client_key,
zone->master->tls_auth_options->client_cert);
}
}
if (!setup_ssl(tp, set, zone->master->tls_auth_options->auth_domain_name)) {
log_msg(LOG_ERR, "xfrd: Cannot setup TLS on pipeline for %s to %s",
zone->apex_str, zone->master->ip_address_spec);
close(fd);
xfrd_set_refresh_now(zone);
return 0;
}
tp->handshake_done = 0;
if(!ssl_handshake(tp)) {
if(tp->handshake_want == SSL_ERROR_SYSCALL) {
log_msg(LOG_ERR, "xfrd: TLS handshake failed "
"for %s to %s: %s", zone->apex_str,
zone->master->ip_address_spec,
strerror(errno));
} else if(tp->handshake_want == SSL_ERROR_SSL) {
char errmsg[1024];
snprintf(errmsg, sizeof(errmsg), "xfrd: "
"TLS handshake failed for %s to %s",
zone->apex_str,
zone->master->ip_address_spec);
log_crypto_err(errmsg);
} else {
log_msg(LOG_ERR, "xfrd: TLS handshake failed "
"for %s to %s with %d", zone->apex_str,
zone->master->ip_address_spec,
tp->handshake_want);
}
close(fd);
xfrd_set_refresh_now(zone);
return 0;
}
#else
log_msg(LOG_ERR, "xfrd: TLS 1.3 is not available, XFR-over-TLS is "
"not supported for %s to %s",
zone->apex_str, zone->master->ip_address_spec);
close(fd);
xfrd_set_refresh_now(zone);
return 0;
#endif
}
if(tp->handler_added)
event_del(&tp->handler);
memset(&tp->handler, 0, sizeof(tp->handler));
event_set(&tp->handler, fd, EV_PERSIST|EV_TIMEOUT|EV_READ|
#ifdef HAVE_TLS_1_3
( !tp->ssl
|| tp->handshake_done
|| tp->handshake_want == SSL_ERROR_WANT_WRITE ? EV_WRITE : 0),
#else
EV_WRITE,
#endif
xfrd_handle_tcp_pipe, tp);
if(event_base_set(xfrd->event_base, &tp->handler) != 0)
log_msg(LOG_ERR, "xfrd tcp: event_base_set failed");
tv.tv_sec = set->tcp_timeout;
tv.tv_usec = 0;
if(event_add(&tp->handler, &tv) != 0)
log_msg(LOG_ERR, "xfrd tcp: event_add failed");
tp->handler_added = 1;
return 1;
}
void
xfrd_tcp_setup_write_packet(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
{
struct xfrd_tcp* tcp = tp->tcp_w;
assert(zone->tcp_conn != -1);
assert(zone->tcp_waiting == 0);
if(zone->soa_disk_acquired == 0 || zone->master->use_axfr_only ||
zone->master->ixfr_disabled ||
(zone->state == xfrd_zone_expired && zone->round_num != 0)) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request full zone transfer "
"(AXFR) for %s to %s",
zone->apex_str, zone->master->ip_address_spec));
VERBOSITY(3, (LOG_INFO, "request full zone transfer "
"(AXFR) for %s to %s",
zone->apex_str, zone->master->ip_address_spec));
xfrd_setup_packet(tcp->packet, TYPE_AXFR, CLASS_IN, zone->apex,
zone->query_id, NULL);
xfrd_prepare_zone_xfr(zone, TYPE_AXFR);
} else {
int apex_compress = 0;
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "request incremental zone "
"transfer (IXFR) for %s to %s",
zone->apex_str, zone->master->ip_address_spec));
VERBOSITY(3, (LOG_INFO, "request incremental zone "
"transfer (IXFR) for %s to %s",
zone->apex_str, zone->master->ip_address_spec));
xfrd_setup_packet(tcp->packet, TYPE_IXFR, CLASS_IN, zone->apex,
zone->query_id, &apex_compress);
xfrd_prepare_zone_xfr(zone, TYPE_IXFR);
NSCOUNT_SET(tcp->packet, 1);
xfrd_write_soa_buffer(tcp->packet, zone->apex, &zone->soa_disk,
apex_compress);
}
if(zone->master->key_options && zone->master->key_options->tsig_key) {
xfrd_tsig_sign_request(
tcp->packet, &zone->latest_xfr->tsig, zone->master);
}
buffer_flip(tcp->packet);
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "sent tcp query with ID %d", zone->query_id));
tcp->msglen = buffer_limit(tcp->packet);
tcp->total_bytes = 0;
}
static void
tcp_conn_ready_for_reading(struct xfrd_tcp* tcp)
{
tcp->total_bytes = 0;
tcp->msglen = 0;
buffer_clear(tcp->packet);
}
#ifdef HAVE_TLS_1_3
static int
conn_write_ssl(struct xfrd_tcp* tcp, SSL* ssl)
{
int request_length;
ssize_t sent;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
uint16_t sendlen = htons(tcp->msglen);
request_length = sizeof(tcp->msglen) - tcp->total_bytes;
ERR_clear_error();
sent = SSL_write(ssl, (const char*)&sendlen + tcp->total_bytes,
request_length);
switch(SSL_get_error(ssl,sent)) {
case SSL_ERROR_NONE:
break;
default:
log_msg(LOG_ERR, "xfrd: generic write problem with tls");
}
if(sent == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
return -1;
}
}
tcp->total_bytes += sent;
if(sent > (ssize_t)sizeof(tcp->msglen))
buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
if(tcp->total_bytes < sizeof(tcp->msglen)) {
return 0;
}
assert(tcp->total_bytes >= sizeof(tcp->msglen));
}
assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
request_length = buffer_remaining(tcp->packet);
ERR_clear_error();
sent = SSL_write(ssl, buffer_current(tcp->packet), request_length);
switch(SSL_get_error(ssl,sent)) {
case SSL_ERROR_NONE:
break;
default:
log_msg(LOG_ERR, "xfrd: generic write problem with tls");
}
if(sent == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
return -1;
}
}
buffer_skip(tcp->packet, sent);
tcp->total_bytes += sent;
if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
return 0;
}
assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
return 1;
}
#endif
int conn_write(struct xfrd_tcp* tcp)
{
ssize_t sent;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
uint16_t sendlen = htons(tcp->msglen);
#ifdef HAVE_WRITEV
struct iovec iov[2];
iov[0].iov_base = (uint8_t*)&sendlen + tcp->total_bytes;
iov[0].iov_len = sizeof(sendlen) - tcp->total_bytes;
iov[1].iov_base = buffer_begin(tcp->packet);
iov[1].iov_len = buffer_limit(tcp->packet);
sent = writev(tcp->fd, iov, 2);
#else
sent = write(tcp->fd,
(const char*)&sendlen + tcp->total_bytes,
sizeof(tcp->msglen) - tcp->total_bytes);
#endif
if(sent == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
return -1;
}
}
tcp->total_bytes += sent;
if(sent > (ssize_t)sizeof(tcp->msglen))
buffer_skip(tcp->packet, sent-sizeof(tcp->msglen));
if(tcp->total_bytes < sizeof(tcp->msglen)) {
return 0;
}
#ifdef HAVE_WRITEV
if(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen)) {
return 1;
}
#endif
assert(tcp->total_bytes >= sizeof(tcp->msglen));
}
assert(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen));
sent = write(tcp->fd,
buffer_current(tcp->packet),
buffer_remaining(tcp->packet));
if(sent == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
return -1;
}
}
buffer_skip(tcp->packet, sent);
tcp->total_bytes += sent;
if(tcp->total_bytes < tcp->msglen + sizeof(tcp->msglen)) {
return 0;
}
assert(tcp->total_bytes == tcp->msglen + sizeof(tcp->msglen));
return 1;
}
void
xfrd_tcp_write(struct xfrd_tcp_pipeline* tp, xfrd_zone_type* zone)
{
int ret;
struct xfrd_tcp* tcp = tp->tcp_w;
assert(zone->tcp_conn != -1);
assert(zone == tp->tcp_send_first);
if(!tp->connection_established) {
int error = 0;
socklen_t len = sizeof(error);
if(getsockopt(tcp->fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0){
error = errno;
}
if(error == EINPROGRESS || error == EWOULDBLOCK)
return;
if(error != 0) {
log_msg(LOG_ERR, "%s: Could not tcp connect to %s: %s",
zone->apex_str, zone->master->ip_address_spec,
strerror(error));
xfrd_tcp_pipe_stop(tp);
return;
}
}
#ifdef HAVE_TLS_1_3
if (tp->ssl) {
if(tp->handshake_done) {
ret = conn_write_ssl(tcp, tp->ssl);
} else if(ssl_handshake(tp)) {
tcp_pipe_reset_timeout(tp);
return;
} else {
if(tp->handshake_want == SSL_ERROR_SYSCALL) {
log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
strerror(errno));
} else if(tp->handshake_want == SSL_ERROR_SSL) {
log_crypto_err("xfrd: TLS handshake failed");
} else {
log_msg(LOG_ERR, "xfrd: TLS handshake failed "
"with value: %d", tp->handshake_want);
}
xfrd_tcp_pipe_stop(tp);
return;
}
} else
#endif
ret = conn_write(tcp);
if(ret == -1) {
log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
xfrd_tcp_pipe_stop(tp);
return;
}
if(tcp->total_bytes != 0 && !tp->connection_established)
tp->connection_established = 1;
if(ret == 0) {
return;
}
tcp_pipe_sendlist_popfirst(tp, zone);
while(tp->tcp_send_first) {
xfrd_tcp_setup_write_packet(tp, tp->tcp_send_first);
#ifdef HAVE_TLS_1_3
if (tp->ssl)
ret = conn_write_ssl(tcp, tp->ssl);
else
#endif
ret = conn_write(tcp);
if(ret == -1) {
log_msg(LOG_ERR, "xfrd: failed writing tcp %s", strerror(errno));
xfrd_tcp_pipe_stop(tp);
return;
}
if(ret == 0)
return;
tcp_pipe_sendlist_popfirst(tp, tp->tcp_send_first);
}
assert(tp->tcp_send_first == NULL);
tcp_pipe_reset_timeout(tp);
}
#ifdef HAVE_TLS_1_3
static int
conn_read_ssl(struct xfrd_tcp* tcp, SSL* ssl)
{
ssize_t received;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
ERR_clear_error();
received = SSL_read(ssl,
(char*) &tcp->msglen + tcp->total_bytes,
sizeof(tcp->msglen) - tcp->total_bytes);
if (received <= 0) {
int err = SSL_get_error(ssl, received);
if(err == SSL_ERROR_WANT_READ && errno == EAGAIN) {
return 0;
}
if(err == SSL_ERROR_ZERO_RETURN) {
return -1;
}
if(err == SSL_ERROR_SYSCALL)
log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
else
log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
}
if(received == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
#ifdef ECONNRESET
if (verbosity >= 2 || errno != ECONNRESET)
#endif
log_msg(LOG_ERR, "tls read sz: %s", strerror(errno));
return -1;
}
} else if(received == 0) {
return -1;
}
tcp->total_bytes += received;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
return 0;
}
assert(tcp->total_bytes == sizeof(tcp->msglen));
tcp->msglen = ntohs(tcp->msglen);
if(tcp->msglen == 0) {
buffer_set_limit(tcp->packet, tcp->msglen);
return 1;
}
if(tcp->msglen > buffer_capacity(tcp->packet)) {
log_msg(LOG_ERR, "buffer too small, dropping connection");
return 0;
}
buffer_set_limit(tcp->packet, tcp->msglen);
}
assert(buffer_remaining(tcp->packet) > 0);
ERR_clear_error();
received = SSL_read(ssl, buffer_current(tcp->packet),
buffer_remaining(tcp->packet));
if (received <= 0) {
int err = SSL_get_error(ssl, received);
if(err == SSL_ERROR_ZERO_RETURN) {
return -1;
}
if(err == SSL_ERROR_SYSCALL)
log_msg(LOG_ERR, "ssl_read returned error SSL_ERROR_SYSCALL with received %zd: %s", received, strerror(errno));
else
log_msg(LOG_ERR, "ssl_read returned error %d with received %zd", err, received);
}
if(received == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
#ifdef ECONNRESET
if (verbosity >= 2 || errno != ECONNRESET)
#endif
log_msg(LOG_ERR, "tcp read %s", strerror(errno));
return -1;
}
} else if(received == 0) {
return -1;
}
tcp->total_bytes += received;
buffer_skip(tcp->packet, received);
if(buffer_remaining(tcp->packet) > 0) {
return 0;
}
assert(buffer_position(tcp->packet) == tcp->msglen);
return 1;
}
#endif
int
conn_read(struct xfrd_tcp* tcp)
{
ssize_t received;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
received = read(tcp->fd,
(char*) &tcp->msglen + tcp->total_bytes,
sizeof(tcp->msglen) - tcp->total_bytes);
if(received == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
#ifdef ECONNRESET
if (verbosity >= 2 || errno != ECONNRESET)
#endif
log_msg(LOG_ERR, "tcp read sz: %s", strerror(errno));
return -1;
}
} else if(received == 0) {
return -1;
}
tcp->total_bytes += received;
if(tcp->total_bytes < sizeof(tcp->msglen)) {
return 0;
}
assert(tcp->total_bytes == sizeof(tcp->msglen));
tcp->msglen = ntohs(tcp->msglen);
if(tcp->msglen == 0) {
buffer_set_limit(tcp->packet, tcp->msglen);
return 1;
}
if(tcp->msglen > buffer_capacity(tcp->packet)) {
log_msg(LOG_ERR, "buffer too small, dropping connection");
return 0;
}
buffer_set_limit(tcp->packet, tcp->msglen);
}
assert(buffer_remaining(tcp->packet) > 0);
received = read(tcp->fd, buffer_current(tcp->packet),
buffer_remaining(tcp->packet));
if(received == -1) {
if(errno == EAGAIN || errno == EINTR) {
return 0;
} else {
#ifdef ECONNRESET
if (verbosity >= 2 || errno != ECONNRESET)
#endif
log_msg(LOG_ERR, "tcp read %s", strerror(errno));
return -1;
}
} else if(received == 0) {
return -1;
}
tcp->total_bytes += received;
buffer_skip(tcp->packet, received);
if(buffer_remaining(tcp->packet) > 0) {
return 0;
}
assert(buffer_position(tcp->packet) == tcp->msglen);
return 1;
}
void
xfrd_tcp_read(struct xfrd_tcp_pipeline* tp)
{
xfrd_zone_type* zone;
struct xfrd_tcp* tcp = tp->tcp_r;
int ret;
enum xfrd_packet_result pkt_result;
#ifdef HAVE_TLS_1_3
if(tp->ssl) {
if(tp->handshake_done) {
ret = conn_read_ssl(tcp, tp->ssl);
} else if(ssl_handshake(tp)) {
tcp_pipe_reset_timeout(tp);
return;
} else {
if(tp->handshake_want == SSL_ERROR_SYSCALL) {
log_msg(LOG_ERR, "xfrd: TLS handshake failed: %s",
strerror(errno));
} else if(tp->handshake_want == SSL_ERROR_SSL) {
log_crypto_err("xfrd: TLS handshake failed");
} else {
log_msg(LOG_ERR, "xfrd: TLS handshake failed "
"with value: %d", tp->handshake_want);
}
xfrd_tcp_pipe_stop(tp);
return;
}
} else
#endif
ret = conn_read(tcp);
if(ret == -1) {
if(errno != 0)
log_msg(LOG_ERR, "xfrd: failed reading tcp %s", strerror(errno));
else
log_msg(LOG_ERR, "xfrd: failed reading tcp: closed");
xfrd_tcp_pipe_stop(tp);
return;
}
if(ret == 0)
return;
buffer_flip(tcp->packet);
if(tcp->msglen < QHEADERSZ) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO,
"xfrd: tcp skip response that is too short"));
tcp_conn_ready_for_reading(tcp);
return;
}
zone = xfrd_tcp_pipeline_lookup_id(tp, ID(tcp->packet));
if(!zone || zone == TCP_NULL_SKIP) {
DEBUG(DEBUG_XFRD,1, (LOG_INFO,
"xfrd: tcp skip response with %s ID",
zone?"set-to-skip":"unknown"));
tcp_conn_ready_for_reading(tcp);
return;
}
assert(zone->tcp_conn != -1);
pkt_result = xfrd_handle_received_xfr_packet(zone, tcp->packet);
tcp_conn_ready_for_reading(tcp);
switch(pkt_result) {
case xfrd_packet_more:
break;
case xfrd_packet_newlease:
xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
tp->key.num_skip++;
case xfrd_packet_transfer:
if(zone->zone_options->pattern->multi_primary_check) {
xfrd_tcp_release(xfrd->tcp_set, zone);
xfrd_make_request(zone);
break;
}
xfrd_tcp_release(xfrd->tcp_set, zone);
assert(zone->round_num == -1);
break;
case xfrd_packet_notimpl:
xfrd_disable_ixfr(zone);
xfrd_tcp_release(xfrd->tcp_set, zone);
xfrd_make_request(zone);
break;
case xfrd_packet_bad:
case xfrd_packet_tcp:
default:
xfrd_tcp_pipeline_skip_id(tp, zone->query_id);
tp->key.num_skip++;
xfrd_tcp_release(xfrd->tcp_set, zone);
xfrd_make_request(zone);
break;
}
}
void
xfrd_tcp_release(struct xfrd_tcp_set* set, xfrd_zone_type* zone)
{
int conn = zone->tcp_conn;
struct xfrd_tcp_pipeline* tp = set->tcp_state[conn];
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: zone %s released tcp conn to %s",
zone->apex_str, zone->master->ip_address_spec));
assert(zone->tcp_conn != -1);
assert(zone->tcp_waiting == 0);
zone->tcp_conn = -1;
zone->tcp_waiting = 0;
tcp_pipe_sendlist_remove(tp, zone);
if(xfrd_tcp_pipeline_lookup_id(tp, zone->query_id) != TCP_NULL_SKIP)
tcp_pipe_id_remove(tp, zone, 1);
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: released tcp pipe now %d unused",
tp->key.num_unused));
if(tp->key.num_unused == 1 && set->tcp_waiting_first) {
#ifdef INET6
struct sockaddr_storage to;
#else
struct sockaddr_in to;
#endif
socklen_t to_len = xfrd_acl_sockaddr_to(
set->tcp_waiting_first->master, &to);
if(to_len == tp->key.ip_len && memcmp(&to, &tp->key.ip, to_len) == 0) {
zone = set->tcp_waiting_first;
assert(zone->tcp_conn == -1);
zone->tcp_conn = conn;
tcp_zone_waiting_list_popfirst(set, zone);
if(zone->zone_handler.ev_fd != -1)
xfrd_udp_release(zone);
xfrd_unset_timer(zone);
pipeline_setup_new_zone(set, tp, zone);
return;
}
}
if(tp->key.num_unused >= tp->pipe_num || tp->key.num_skip >= tp->pipe_num - tp->key.num_unused)
xfrd_tcp_pipe_release(set, tp, conn);
}
void
xfrd_tcp_pipe_release(struct xfrd_tcp_set* set, struct xfrd_tcp_pipeline* tp,
int conn)
{
DEBUG(DEBUG_XFRD,1, (LOG_INFO, "xfrd: tcp pipe released"));
if(tp->handler_added)
event_del(&tp->handler);
tp->handler_added = 0;
#ifdef HAVE_TLS_1_3
if (tp->ssl) {
DEBUG(DEBUG_XFRD, 1, (LOG_INFO, "xfrd: Shutting down TLS"));
SSL_shutdown(tp->ssl);
SSL_free(tp->ssl);
tp->ssl = NULL;
}
#endif
if(tp->tcp_r->fd != -1)
close(tp->tcp_r->fd);
tp->tcp_r->fd = -1;
tp->tcp_w->fd = -1;
(void)rbtree_delete(xfrd->tcp_set->pipetree, &tp->key.node);
while(set->tcp_count == set->tcp_max && set->tcp_waiting_first) {
xfrd_zone_type* zone = set->tcp_waiting_first;
assert(zone->tcp_conn == -1);
zone->tcp_conn = conn;
tcp_zone_waiting_list_popfirst(set, zone);
if(zone->zone_handler.ev_fd != -1)
xfrd_udp_release(zone);
if(!xfrd_tcp_open(set, tp, zone)) {
zone->tcp_conn = -1;
xfrd_set_refresh_now(zone);
continue;
}
xfrd_tcp_pipeline_init(tp);
(void)rbtree_insert(set->pipetree, &tp->key.node);
xfrd_unset_timer(zone);
pipeline_setup_new_zone(set, tp, zone);
return;
}
assert(!set->tcp_waiting_first);
set->tcp_count --;
assert(set->tcp_count >= 0);
}