Commit d2d47b86 authored by Smit Gangurde's avatar Smit Gangurde

RAMCloud Offload, Smit MTP

parent 33075e8b
*.gch
*.out
*.o
*code-workspace
.vscode
.*~
\ No newline at end of file
#ifndef ERR_MSSG_H
#define ERR_MSSG_H
#define D(x) do{x;}while(0)
#endif
\ No newline at end of file
TRANSPORT_TYPE=RDMA_RC
NUM_THREADS=8
\ No newline at end of file
TRANSPORT_TYPE=TCP
NUM_THREAD=8
\ No newline at end of file
###############_Work in Progress_###############
Things to look out for: cpu speed difference between host and nic can cause an issue while programming in RDMA
Current Issue: Memory leak on exit of connection
Keeps creating buffers for dead connection messages (why is select even returning dead connections?)
Fix to be implemented: Clean closing of connections
Compilation commands:
NIC:
g++ -g integrated_nic.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/dispatcher.cc include/client_functions.cc include/hash.cc include/Buffer.cc include/cli_api.cc -libverbs -lpthread -mcmodel=small
SERVER:
g++ -g integrated_server.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/cli_api.cc include/dispatcher.cc include/client_functions.cc include/hash.cc include/Buffer.cc -libverbs -lpthread -mcmodel=medium
CLIENT:
g++ -g threaded_client1.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/dispatcher.cc include/client_functions.cc include/cli_api.cc include/hash.cc include/Buffer.cc -libverbs -lpthread -mcmodel=medium
class ReplicaManager {
public:
private:
}
\ No newline at end of file
#include <iostream>
#include <sys/time.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const short int MODE = 0;
char *SERVER_HOST = "192.168.200.20";
char *SERVER_NIC = "192.168.200.21";
const int NIC_PORT = 8090;
const double err_fraction = 0.5;
const short int dev_num = 0;
int main(int argc, char * argv[]) {
char *temp = (char *) malloc(128);
struct resource_base *base;
base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(base);
//base->server_name = SERVER_NIC;
base->ib_port = IB_PORT;
base->gid_idx = GID_IDX;
base->port = NIC_PORT;
open_dev(base, dev_num);
allocate_pd(base);
register_mr(base);
init_cq(base);
init_qp(base);
struct ibv_port_attr port_attr;
if(ibv_query_port(base->ctx, base->ib_port, &port_attr))
D(err_msg("ibv_query_gid", true, base));
if(port_attr.state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, base));
base->port_attr = &port_attr;
union ibv_gid my_gid;
if(ibv_query_gid(base->ctx, base->ib_port, base->gid_idx, &my_gid))
D(err_msg("ibv_query_gid", true, base));
memcpy(base->local_conn->gid, &my_gid, 16);
connect_qp(base);
strcpy(base->mr_buf_addr, "yo");
sync_remote_qp(base, "R", temp, 1);
post_send(base, IBV_WR_RDMA_WRITE);
//base->mr_buf_addr = (char *) malloc(base->mr_size);
//strcpy(base->mr_buf_addr, "Hi from client\0");
//sock_connect(base);
//sync_remote_qp(base, base->mr_buf_addr, temp, 15);
union object test_obj;
union object ret_obj;
memset(&test_obj, 0, sizeof(test_obj));
memset(&ret_obj, 0, sizeof(ret_obj));
test_obj.obj.key = 1;
test_obj.obj.value[0] = 'T';
test_obj.obj.version = 1;
test_obj.obj.status = STATUS_OK;
struct timeval temp_time;
double snd_ts, rcvd_ts;
double avg = 0;
int err_cnt = 0;
int succ_cnt = 0;
// for(int i=0; i<1000; i++) {
// gettimeofday(&temp_time, NULL);
// //time in ms
// snd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
// test_obj.obj.send_ts = snd_ts;
// sync_remote_qp(base, "W", temp, 1);
// sync_remote_qp(base, (char *)&test_obj, (char *)&ret_obj, sizeof(test_obj));
// gettimeofday(&temp_time, NULL);
// rcvd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
// avg += (rcvd_ts - snd_ts);
// }
enum Status ret_status;
for(int i=0; i<cache_meta_size; i++) {
test_obj.obj.key = i;
gettimeofday(&temp_time, NULL);
snd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
test_obj.obj.send_ts = snd_ts;
//send_and_check(base, (char *)&test_obj, (char *)&ret_status, sizeof(test_obj), sizeof(ret_status));
memcpy((void*)base->mr_buf_addr, (void*)&test_obj, sizeof(test_obj));
send_obj(base, "R", 1);
sync_remote_qp(base, "T", temp, 1);
read_obj(base, (char *)&ret_status, sizeof(ret_status));
gettimeofday(&temp_time, NULL);
rcvd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
avg += (rcvd_ts - snd_ts);
if(ret_status == STATUS_WRONG_VERSION) err_cnt++;
else succ_cnt++;
}
std::cout<<"Errored requests: "<<err_cnt<<std::endl;
std::cout<<"Successful requests: "<<succ_cnt<<std::endl;
std::cout<<"Avg. RTT: "<<avg/(double)cache_meta_size<<" ms"<<std::endl;
// if(ret_obj.obj.status == STATUS_WRONG_VERSION)
// std::cout<<"Returned with wrong version status"<<std::endl;
cleanup(base);
return 0;
}
DEBUG=TRUE
ANALYZE=TRUE
INTERACTIVE_MODE=FALSE
TRANSPORT_TYPE=RDMA_RC
NUM_THREADS=0
CONN_PORT=8888
MAX_PACKET_SIZE_MBYTES=4;
RDMA_MR_SIZE_MBYTES=1
RDMA_MTU_SIZE=512
RDMA_MIN_RNR_TIMER=12
RDMA_TIMEOUT=12
RDMA_CQ_POLL_TIMEOUT_MS=5
RDMA_RETRY_CNT=4
RDMA_IB_PORT=1
RDMA_GID_IDX=1
\ No newline at end of file
#ifndef __CONFIG_PARAMETERS_H__
#define __CONFIG_PARAMETERS_H__
#include <unordered_map>
#include <string>
#include "../transport_api/transport_config.hpp"
enum params {
DEBUG,
ANALYZE,
INTERACTIVE_MODE,
TRANSPORT_TYPE,
NUM_THREADS,
CONN_PORT,
NUM_REPLICAS,
MAX_PACKET_SIZE_BYTES,
MAX_PACKET_SIZE_MBYTES,
RDMA_MR_SIZE_BYTES,
RDMA_MR_SIZE_MBYTES, //max 8
RDMA_MTU_SIZE_BYTES, //one of 256,512,1024,2048,4096
RDMA_MIN_RNR_TIMER,
RDMA_TIMEOUT,
RDMA_CQ_POLL_TIMEOUT_MS,
RDMA_RETRY_CNT,
RDMA_IB_PORT,
RDMA_GID_IDX,
ARRIVAL_RATE
};
const std::string param_strs[] = {
"DEBUG",
"ANALYZE",
"INTERACTIVE_MODE",
"TRANSPORT_TYPE",
"NUM_THREADS",
"CONN_PORT",
"NUM_REPLICAS",
"MAX_PACKET_SIZE_BYTES",
"MAX_PACKET_SIZE_MBYTES",
"RDMA_MR_SIZE_BYTES",
"RDMA_MR_SIZE_MBYTES",
"RDMA_MTU_SIZE_BYTES",
"RDMA_MIN_RNR_TIMER",
"RDMA_TIMEOUT",
"RDMA_CQ_POLL_TIMEOUT_MS",
"RDMA_RETRY_CNT",
"RDMA_IB_PORT",
"RDMA_GID_PORT",
"ARRIVAL_RATE"
};
const int num_params = 17;
#endif
\ No newline at end of file
DEBUG=TRUE
ANALYZE=TRUE
TRANSPORT_TYPE=RDMA_RC
NUM_THREADS=3
CONN_PORT=8888
MAX_PACKET_SIZE_MBYTES=4;
RDMA_MR_SIZE_MBYTES=1
RDMA_MTU_SIZE=512
RDMA_MIN_RNR_TIMER=12
RDMA_TIMEOUT=12
RDMA_CQ_POLL_TIMEOUT_MS=5
RDMA_RETRY_CNT=4
RDMA_IB_PORT=1
RDMA_GID_IDX=1
\ No newline at end of file
#ifndef __READ_CONFIG_CC__
#define __READ_CONFIG_CC__
#include <chrono>
#include <iostream>
#include <string>
#include <iterator>
#include <vector>
#include <fstream>
#include <infiniband/verbs.h>
#include "../transport_api/transport_config.hpp"
#include "read_config.hpp"
std::unordered_map<std::string, enum Transport_Type> transport_type_map = {
{"TCP", TCP_IP_TRANSPORT},
{"UDP", UDP_TRANSPORT},
{"RDMA_RC", RDMA_RC_TRANSPORT},
{"RDMA_UC", RDMA_UC_TRANSPORT},
};
std::unordered_map<std::string, enum params> param_map = {
{"DEBUG", DEBUG},
{"ANALYZE", ANALYZE},
{"INTERACTIVE_MODE", INTERACTIVE_MODE},
{"TRANSPORT_TYPE", TRANSPORT_TYPE},
{"NUM_THREADS", NUM_THREADS},
{"CONN_PORT", CONN_PORT},
{"NUM_REPLICAS", NUM_REPLICAS},
{"MAX_PACKET_SIZE_BYTES", MAX_PACKET_SIZE_BYTES},
{"MAX_PACKET_SIZE_MBYTES", MAX_PACKET_SIZE_MBYTES},
{"RDMA_MR_SIZE_BYTES", RDMA_MR_SIZE_BYTES},
{"RDMA_MR_SIZE_MBYTES", RDMA_MR_SIZE_MBYTES},
{"RDMA_MTU_SIZE_BYTES", RDMA_MTU_SIZE_BYTES},
{"RDMA_MIN_RNR_TIMER", RDMA_MIN_RNR_TIMER},
{"RDMA_TIMEOUT", RDMA_TIMEOUT},
{"RDMA_CQ_POLL_TIMEOUT_MS", RDMA_CQ_POLL_TIMEOUT_MS},
{"RDMA_RETRY_CNT", RDMA_RETRY_CNT},
{"RDMA_IB_PORT", RDMA_IB_PORT},
{"RDMA_GID_IDX", RDMA_GID_IDX},
{"ARRIVAL_RATE", ARRIVAL_RATE},
};
std::string strip_whitespaces(std::string str) {
std::string t;
std::string::iterator it = str.begin();
while(it!=str.end() && (*it)==' ') it++;
while(it!=str.end() && (*it)!=' ') {
t.push_back((*it));
it++;
}
return t;
}
Params::Params() {
//pass
}
Params::Params(std::string f) {
this->filename = f;
this->debug = false;
this->analyze = false;
this->interactive_mode = true;
this->transport_type = TCP_IP_TRANSPORT;
this->num_threads = 0;
this->conn_port = 8080;
this->num_replicas = 0;
this->max_packet_size_bytes = 0;
this->rdma_mr_size_bytes = 0;
this->rdma_mtu_size_bytes = IBV_MTU_512;
this->rdma_min_rnr_timer = 0;
this->rdma_timeout = 0;
this->rdma_cq_poll_timeout_ms = std::chrono::duration<double>{0.0};
this->rdma_retry_cnt = 0;
this->rdma_ib_port = 0;
this->rdma_gid_idx = 0;
this->arrival_rate = 0.0;
}
//debug functions
void Params::print_map() {
for(auto x: this->param_val_map) {
std::cout<<x.first<<" : "<<x.second<<std::endl;
}
return;
}
void Params::print_vals() {
std::cout<<"DEBUG: ";
if(this->debug) std::cout<<"TRUE"<<std::endl;
else std::cout<<"FALSE"<<std::endl;
std::cout<<"ANALYZE: ";
if(this->analyze) std::cout<<"TRUE"<<std::endl;
else std::cout<<"FALSE"<<std::endl;
std::cout<<"MODE: ";
if(this->interactive_mode) std::cout<<"INTERACTIVE MODE"<<std::endl;
else std::cout<<"BATCH MODE"<<std::endl;
std::cout<<"Transport: ";
switch(this->transport_type) {
case TCP_IP_TRANSPORT:
std::cout<<"TCP"<<std::endl;
break;
case UDP_TRANSPORT:
std::cout<<"UDP"<<std::endl;
break;
case RDMA_RC_TRANSPORT:
std::cout<<"RDMA RC"<<std::endl;
break;
case RDMA_UC_TRANSPORT:
std::cout<<"RDMA UC"<<std::endl;
break;
}
std::cout<<"Num thread: "<<this->num_threads<<std::endl;
std::cout<<"Conn Port: "<<this->conn_port<<std::endl;
std::cout<<"Num Replicas: "<<this->num_replicas<<std::endl;
std::cout<<"Max Packet Size (Bytes): "<<this->max_packet_size_bytes<<std::endl;
std::cout<<"RDMA MR size (Bytes): "<<this->rdma_mr_size_bytes<<std::endl;
std::cout<<"RDMA MTU size (Bytes): ";
switch (this->rdma_mtu_size_bytes) {
case IBV_MTU_256:
std::cout<<"256B"<<std::endl;
break;
case IBV_MTU_512:
std::cout<<"512B"<<std::endl;
break;
case IBV_MTU_1024:
std::cout<<"1024B"<<std::endl;
break;
case IBV_MTU_2048:
std::cout<<"2048B"<<std::endl;
break;
case IBV_MTU_4096:
std::cout<<"4096B"<<std::endl;
break;
}
std::cout<<"RDMA min rnr timer: "<<this->rdma_min_rnr_timer<<std::endl;
std::cout<<"RDMA timeout: "<<this->rdma_timeout<<std::endl;
std::cout<<"RDMA CQ poll timeout: "<<this->rdma_cq_poll_timeout_ms.count()<<std::endl;
std::cout<<"RDMA IB Port: "<<this->rdma_ib_port<<std::endl;
std::cout<<"RDMA GID IDX: "<<this->rdma_gid_idx<<std::endl;
std::cout<<"ARRIVAL RATE: "<<this->arrival_rate<<" requests/second"<<std::endl;
return;
}
// Just Reads config file
// to fill an internal map
void Params::read_config_file() {
std::string line;
std::ifstream config_file;
config_file.open(this->filename);
int pos;
while(getline(config_file, line)) {
if(line[0]=='\n' ||
(line[0]=='/' && line[1]=='/')) continue;
pos = line.find_first_of('=');
if(pos==std::string::npos) continue;
this->param_val_map[line.substr(0, pos)] = line.substr(pos+1, line.size()-pos);
}
return;
}
// Fills parameter variables
// using internal parameter map
void Params::fill_params() {
std::string t1, t2;
double tmp;
enum params param_type;
if(this->param_val_map.empty()) {
//error
return;
}
for(auto p: this->param_val_map) {
t1 = strip_whitespaces(p.first);
t2 = strip_whitespaces(p.second);
if(param_map.count(t1)==0) continue;
param_type = param_map[t1];
switch(param_type) {
case DEBUG:
if(t2.compare("TRUE") == 0) {
this->debug = true;
}
else {
this->debug = false;
}
break;
case ANALYZE:
if(t2.compare("TRUE") == 0) {
this->analyze = true;
}
else {
this->analyze = false;
}
break;
case INTERACTIVE_MODE:
if(t2.compare("TRUE") == 0) {
this->interactive_mode = true;
}
else {
this->interactive_mode = false;
}
case TRANSPORT_TYPE:
this->transport_type = transport_type_map[t2];
break;
case NUM_THREADS:
this->num_threads = std::stoi(t2);
break;
case CONN_PORT:
this->conn_port = std::stoi(t2);
break;
case NUM_REPLICAS:
this->num_replicas = std::stoi(t2);
break;
case MAX_PACKET_SIZE_BYTES:
this->max_packet_size_bytes = std::stoi(t2);
break;
case MAX_PACKET_SIZE_MBYTES:
this->max_packet_size_bytes = (int) (std::stoi(t2)*(1<<20));
break;
case RDMA_MR_SIZE_BYTES:
this->rdma_mr_size_bytes = (size_t) (std::stoi(t2));
break;
case RDMA_MR_SIZE_MBYTES:
this->rdma_mr_size_bytes = (size_t) (std::stoi(t2)*(1<<20));
break;
case RDMA_MTU_SIZE_BYTES:
switch(std::stoi(t2)) {
case 256:
this->rdma_mtu_size_bytes = IBV_MTU_256;
break;
case 512:
this->rdma_mtu_size_bytes = IBV_MTU_512;
break;
case 1024:
this->rdma_mtu_size_bytes = IBV_MTU_1024;
break;
case 2048:
this->rdma_mtu_size_bytes = IBV_MTU_2048;
break;
case 4096:
this->rdma_mtu_size_bytes = IBV_MTU_4096;
break;
default:
//error
break;
}
break;
case RDMA_MIN_RNR_TIMER:
this->rdma_min_rnr_timer = std::stoi(t2);
break;
case RDMA_TIMEOUT:
this->rdma_timeout = std::stoi(t2);
break;
case RDMA_CQ_POLL_TIMEOUT_MS:
tmp = std::stod(t2);
tmp *= 1e-3; //convert to ms
this->rdma_cq_poll_timeout_ms = std::chrono::duration<double>{tmp};
break;
case RDMA_RETRY_CNT:
this->rdma_retry_cnt = std::stoi(t2);
break;
case RDMA_IB_PORT:
this->rdma_ib_port = std::stoi(t2);
break;
case RDMA_GID_IDX:
this->rdma_gid_idx = std::stoi(t2);
break;
case ARRIVAL_RATE:
this->arrival_rate = std::stod(t2);
default:
//error
break;
}
}
return;
}
// Reads config file and
// Fills parameter variables
void Params::read_params() {
this->read_config_file();
this->fill_params();
return;
}
#endif
\ No newline at end of file
#ifndef __READ_CONFIG_H__
#define __READ_CONFIG_H__
#include <chrono>
#include <string>
#include <vector>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "config_parameters.hpp"
#include "../transport_api/transport_config.hpp"
class Params {
private:
std::string filename;
std::vector<std::string> param_lines;
std::unordered_map<std::string, std::string> param_val_map;
public:
bool debug;
bool analyze;
bool interactive_mode;
enum Transport_Type transport_type;
int num_threads;
int conn_port;
int num_replicas;
int max_packet_size_bytes;
size_t rdma_mr_size_bytes;
enum ibv_mtu rdma_mtu_size_bytes;
int rdma_min_rnr_timer;
int rdma_timeout;
std::chrono::duration<double> rdma_cq_poll_timeout_ms;
int rdma_retry_cnt;
int rdma_ib_port;
int rdma_gid_idx;
double arrival_rate;
Params();
Params(std::string f);
//debug functions
void print_map();
void print_vals();
//required functions
void read_params();
void read_config_file();
void fill_params();
};
std::vector<std::string> get_param_lines(char *filename);
enum params check_token(std::string token);
#endif
\ No newline at end of file
#include <iostream>
#include "read_config.hpp"
using namespace std;
int main() {
Params p("config.conf");
p.read_params();
p.print_vals();
return 0;
}
\ No newline at end of file
DEBUG=TRUE
ANALYZE=TRUE
TRANSPORT_TYPE=RDMA_RC
NUM_THREADS=3
CONN_PORT=8888
MAX_PACKET_SIZE_MBYTES=4;
RDMA_MR_SIZE_MBYTES=1
RDMA_MTU_SIZE=512
RDMA_MIN_RNR_TIMER=12
RDMA_TIMEOUT=12
RDMA_CQ_POLL_TIMEOUT_MS=5
RDMA_RETRY_CNT=4
RDMA_IB_PORT=1
RDMA_GID_IDX=1
\ No newline at end of file
DEBUG=TRUE
ANALYZE=TRUE
INTERACTIVE_MODE=FALSE
TRANSPORT_TYPE=RDMA_RC
NUM_THREADS=0
CONN_PORT=8888
MAX_PACKET_SIZE_MBYTES=4;
RDMA_MR_SIZE_MBYTES=1
RDMA_MTU_SIZE=512
RDMA_MIN_RNR_TIMER=12
RDMA_TIMEOUT=12
RDMA_CQ_POLL_TIMEOUT_MS=5
RDMA_RETRY_CNT=4
RDMA_IB_PORT=1
RDMA_GID_IDX=1
ARRIVAL_RATE=80
\ No newline at end of file
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using namespace std;
char* NIC_IP = "192.168.200.21";
char* SERVER_IP = "192.168.200.20";
char* CLIENT_IP = "192.168.200.40";
int SERVER_PORT = 8989;
int CLIENT_PORT = 9898;
int send_obj(int cfd, char* obj, int size) {
int op_bytes = 0;
op_bytes = write(cfd, obj, size);
if(op_bytes < size) {
cout<<"write err"<<endl;
return -1;
}
return 0;
}
int read_obj(int cfd, char *obj, int size) {
int ip_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
ip_bytes = read(cfd, obj, size);
if(ip_bytes == 0) break;
else if(ip_bytes > 0) tot_bytes += ip_bytes;
else break;
}
if(tot_bytes < size) {
cout<<"read err"<<endl;
return -1;
}
return 0;
}
int sock_connect(char* server_name, int port, int* local_fd, int* conn_fd) {
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0) {
cout<<"sfd:socket"<<endl;
return -1;
}
if(server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
cout<<"bind"<<endl;
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
*local_fd = sfd;
*conn_fd = cfd;
return 0;
}
else {
inet_aton(server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
cout<<"connect"<<endl;
return -1;
}
*conn_fd = sfd;
return 0;
}
}
struct dummy {
long long key;
bool valid;
char val[100];
};
long long arr_size = 900000;
void populate_objs(dummy arr[], long long arr_size) {
for(int i=0; i<arr_size; i++) {
arr[i].key = i;
arr[i].val[0] = 'H';
arr[i].val[1] = 'E';
arr[i].val[2] = 'Y';
arr[i].val[3] = '\0';
}
}
int main() {
//dummy obj[arr_size];
//populate_objs(obj, arr_size);
int nic_sfd, nic_cfd;
sock_connect(NIC_IP, CLIENT_PORT, &nic_sfd, &nic_cfd);
struct timeval t_time;
double s1_time, s2_time, e_time, avg_time;
avg_time = 0.0;
int succ_cnt = 0;
int err_cnt = 0;
char *t = (char*)malloc(1);
int obj_size = sizeof(struct dummy);
dummy obj;
gettimeofday(&t_time, NULL);
s2_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
for(long long i=0; i<arr_size; i++) {
//printf("\r%lld",i);
//fflush(stdout);
gettimeofday(&t_time, NULL);
s1_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
obj.key = i;
send_obj(nic_cfd, (char *)&obj, obj_size);
read_obj(nic_cfd, t, 1);
gettimeofday(&t_time, NULL);
e_time = ((double)t_time.tv_sec*1000.0) +((double)t_time.tv_usec/1000.0);
avg_time += (e_time - s1_time);
//cout<<t<<endl;
if(t[0]=='S') succ_cnt++;
else err_cnt++;
//usleep(10000);
}
gettimeofday(&t_time, NULL);
e_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
double overall_time = (e_time - s2_time)/1000.0;
avg_time = avg_time/(succ_cnt+err_cnt);
cout<<"Success: "<<succ_cnt<<endl;
cout<<"Errored: "<<err_cnt<<endl;
cout<<"Avg. RTT: "<<avg_time<<" ms"<<endl;
cout<<"Overall Time: "<<overall_time<<endl;
cout<<"Througput: "<<(succ_cnt+err_cnt)/overall_time<<endl;
cout<<"Closing connections"<<endl;
close(nic_cfd);
close(nic_sfd);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using namespace std;
char* NIC_IP = "192.168.200.21";
char* SERVER_IP = "192.168.200.20";
char* CLIENT_IP = "192.168.200.40";
int SERVER_PORT = 8989;
int CLIENT_PORT = 9898;
int send_obj(int cfd, char* obj, int size) {
int op_bytes = 0;
op_bytes = write(cfd, obj, size);
if(op_bytes < size) {
cout<<"write err"<<endl;
return -1;
}
return 0;
}
int read_obj(int cfd, char *obj, int size) {
int ip_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
ip_bytes = read(cfd, obj, size);
if(ip_bytes == 0) break;
else if(ip_bytes > 0) tot_bytes += ip_bytes;
else break;
}
if(tot_bytes < size) {
cout<<"read err"<<endl;
return -1;
}
return 0;
}
int sock_connect(char* server_name, int port, int* local_fd, int* conn_fd) {
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0) {
cout<<"sfd:socket"<<endl;
return -1;
}
if(server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
cout<<"bind"<<endl;
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
*local_fd = sfd;
*conn_fd = cfd;
return 0;
}
else {
inet_aton(server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
cout<<"connect"<<endl;
return -1;
}
*conn_fd = sfd;
return 0;
}
}
struct dummy {
long long key;
bool valid;
char val[100];
};
void populate_objs(dummy arr[], long long arr_size, double err_fraction) {
for(long long i=0; i<arr_size; i++) {
arr[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) arr[i].valid = false;
else arr[i].valid = true;
}
}
void dummy_function() {
int j=0;
for(int i=0; i<1000; i++) j++;
}
double err_fraction = 0.0;
long long arr_size = 1000000;
int main() {
//dummy arr[arr_size];
//populate_objs(arr, arr_size, err_fraction);
int client_sfd, client_cfd;
sock_connect(NULL, SERVER_PORT, &client_sfd, &client_cfd);
dummy obj;
int obj_size = sizeof(obj);
char *s = "S";
char *e = "E";
int succ_cnt = 0;
int err_cnt = 0;
struct timeval t_time;
double s_time, e_time, overall_time;
gettimeofday(&t_time, NULL);
s_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
for(long long i=0; i<arr_size; i++) {
read_obj(client_cfd, (char*)&obj, obj_size);
if((double)rand()/RAND_MAX <= err_fraction) obj.valid = false;
else obj.valid = true;
if(obj.valid) {
succ_cnt++;
dummy_function();
send_obj(client_cfd, s, 1);
}
else {
err_cnt++;
send_obj(client_cfd, e, 1);
}
}
gettimeofday(&t_time, NULL);
e_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
overall_time = (e_time-s_time)/1000.0;
cout<<"Throughput: "<<(succ_cnt+err_cnt)/overall_time<<endl;
cout<<"Closing connections"<<endl;
//close(nic_cfd);
close(client_sfd);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using namespace std;
char* NIC_IP = "192.168.200.21";
char* SERVER_IP = "192.168.200.20";
char* CLIENT_IP = "192.168.200.40";
int SERVER_PORT = 8989;
int CLIENT_PORT = 9898;
int send_obj(int cfd, char* obj, int size) {
int op_bytes = 0;
op_bytes = write(cfd, obj, size);
if(op_bytes < size) {
cout<<"write err"<<endl;
return -1;
}
return 0;
}
int read_obj(int cfd, char *obj, int size) {
int ip_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
ip_bytes = read(cfd, obj, size);
if(ip_bytes == 0) break;
else if(ip_bytes > 0) tot_bytes += ip_bytes;
else break;
}
if(tot_bytes < size) {
cout<<"read err"<<endl;
return -1;
}
return 0;
}
int sock_connect(char* server_name, int port, int* local_fd, int* conn_fd) {
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0) {
cout<<"sfd:socket"<<endl;
return -1;
}
if(server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
cout<<"bind"<<endl;
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
*local_fd = sfd;
*conn_fd = cfd;
return 0;
}
else {
inet_aton(server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
cout<<"connect"<<endl;
return -1;
}
*conn_fd = sfd;
return 0;
}
}
struct dummy {
int key;
bool valid;
char val[100];
};
void populate_objs(dummy arr[], int arr_size, double err_fraction) {
for(int i=0; i<arr_size; i++) {
arr[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) arr[i].valid = false;
else arr[i].valid = true;
}
}
void dummy_function() {
int j=0;
for(int i=0; i<1000; i++) j++;
}
double err_fraction = 0.0;
int arr_size = 1000;
int main() {
dummy arr[arr_size];
populate_objs(arr, arr_size, err_fraction);
int client_sfd, client_cfd;
sock_connect(NULL, SERVER_PORT, &client_sfd, &client_cfd);
dummy obj;
int obj_size = sizeof(obj);
char *s = "S";
char *e = "E";
int succ_cnt = 0;
int err_cnt = 0;
for(int i=0; i<arr_size; i++) {
read_obj(client_cfd, (char*)&obj, obj_size);
if(arr[obj.key].valid) {
succ_cnt++;
dummy_function();
send_obj(client_cfd, s, 1);
}
else {
err_cnt++;
send_obj(client_cfd, e, 1);
}
}
cout<<"Closing connections"<<endl;
//close(nic_cfd);
close(client_sfd);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using namespace std;
char* NIC_IP = "192.168.200.21";
char* SERVER_IP = "192.168.200.20";
char* CLIENT_IP = "192.168.200.40";
int SERVER_PORT = 8989;
int CLIENT_PORT = 9898;
int send_obj(int cfd, char* obj, int size) {
int op_bytes = 0;
op_bytes = write(cfd, obj, size);
if(op_bytes < size) {
cout<<"write err"<<endl;
return -1;
}
return 0;
}
int read_obj(int cfd, char *obj, int size) {
int ip_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
ip_bytes = read(cfd, obj, size);
if(ip_bytes == 0) break;
else if(ip_bytes > 0) tot_bytes += ip_bytes;
else break;
}
if(tot_bytes < size) {
cout<<"read err"<<endl;
return -1;
}
return 0;
}
int sock_connect(char* server_name, int port, int* local_fd, int* conn_fd) {
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0) {
cout<<"sfd:socket"<<endl;
return -1;
}
if(server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
cout<<"bind"<<endl;
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
*local_fd = sfd;
*conn_fd = cfd;
return 0;
}
else {
inet_aton(server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
cout<<"connect"<<endl;
return -1;
}
*conn_fd = sfd;
return 0;
}
}
struct dummy {
long long key;
bool valid;
char val[100];
};
void populate_objs(dummy arr[], int arr_size, double err_fraction) {
for(int i=0; i<arr_size; i++) {
arr[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) arr[i].valid = false;
else arr[i].valid = true;
}
}
double err_fraction = 0.6;
long long arr_size = 9000000;
int main() {
//dummy obj[arr_size];
srand(time(NULL));
//populate_objs(obj, arr_size, err_fraction);
int server_sfd, server_cfd, client_sfd, client_cfd;
sock_connect(SERVER_IP, SERVER_PORT, &server_sfd, &server_cfd);
cout<<"Connected to server"<<endl;
sock_connect(NULL, CLIENT_PORT, &client_sfd, &client_cfd);
cout<<"Connected to client"<<endl;
double s_time, e_time, avg_time;
struct timeval temp_time;
cout<<"Connected to server and client"<<endl;
dummy t_obj;
int succ_cnt = 0;
int err_cnt = 0;
int obj_size = sizeof(t_obj);
char *temp_char = (char*)malloc(1);
char *s = "S";
char *e = "E";
memset((void*)&t_obj, 0, obj_size);
for(long long i=0; i<arr_size; i++) {
//cout<<i<<endl;
read_obj(client_cfd, (char*)&t_obj, obj_size);
if((double)rand()/RAND_MAX <= err_fraction) t_obj.valid = false;
else t_obj.valid = true;
if(t_obj.valid) {
succ_cnt++;
//cout<<t_obj.val<<endl;
//dummy_function();
send_obj(server_cfd, (char*)&t_obj, obj_size);
//cout<<"obj_sent"<<endl;
read_obj(server_cfd, temp_char, 1);
send_obj(client_cfd, s, 1);
}
else {
err_cnt++;
send_obj(client_cfd, e, 1);
}
}
cout<<"Error Fraction: "<<err_fraction<<endl;
cout<<"Succcessful: "<<succ_cnt<<endl;
cout<<"Errored: "<<err_cnt<<endl;
cout<<"....Closing connections...."<<endl;
t_obj.val[0]='D';
send_obj(server_cfd, (char*)&t_obj, obj_size);
close(client_cfd);
close(server_cfd);
close(client_sfd);
close(server_sfd);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using namespace std;
char* NIC_IP = "192.168.200.21";
char* SERVER_IP = "192.168.200.20";
char* CLIENT_IP = "192.168.200.40";
int SERVER_PORT = 8989;
int CLIENT_PORT = 9898;
int send_obj(int cfd, char* obj, int size) {
int op_bytes = 0;
op_bytes = write(cfd, obj, size);
if(op_bytes < size) {
cout<<"write err"<<endl;
return -1;
}
return 0;
}
int read_obj(int cfd, char *obj, int size) {
int ip_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
ip_bytes = read(cfd, obj, size);
if(ip_bytes == 0) break;
else if(ip_bytes > 0) tot_bytes += ip_bytes;
else break;
}
if(tot_bytes < size) {
cout<<"read err"<<endl;
return -1;
}
return 0;
}
int sock_connect(char* server_name, int port, int* local_fd, int* conn_fd) {
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0) {
cout<<"sfd:socket"<<endl;
return -1;
}
if(server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
cout<<"bind"<<endl;
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
*local_fd = sfd;
*conn_fd = cfd;
return 0;
}
else {
inet_aton(server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
cout<<"connect"<<endl;
return -1;
}
*conn_fd = sfd;
return 0;
}
}
struct dummy {
long long key;
bool valid;
char val[100];
};
void populate_objs(dummy arr[], long long arr_size, double err_fraction) {
for(long long i=0; i<arr_size; i++) {
arr[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) arr[i].valid = false;
else arr[i].valid = true;
}
}
void dummy_function() {
int j=0;
for(int i=0; i<1000; i++) j++;
}
double err_fraction = 0.0;
long long arr_size = 900000;
int main() {
//dummy arr[arr_size];
//populate_objs(arr, arr_size, err_fraction);
int nic_sfd, nic_cfd;
sock_connect(NULL, SERVER_PORT, &nic_sfd, &nic_cfd);
dummy obj;
int obj_size = sizeof(obj);
char *s = (char *)malloc(1);
char *e = "E";
int succ_cnt = 0;
int err_cnt = 0;
int req_cnt = 0;
struct timeval t_time;
double s_time, s1_time, e_time, overall_time, overall_time1;
overall_time1=0.0;
gettimeofday(&t_time, NULL);
s_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
while(true) {
read_obj(nic_cfd, (char*)&obj, obj_size);
if(obj.val[0]=='D') break;
gettimeofday(&t_time, NULL);
s1_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
//if((double)rand()/RAND_MAX <= err_fraction) obj.valid = false;
//else obj.valid = true;
// if(obj.valid) {
// succ_cnt++;
// dummy_function();
// send_obj(client_cfd, s, 1);
// }
// else {
// err_cnt++;
// send_obj(client_cfd, e, 1);
// }
req_cnt++;
dummy_function();
send_obj(nic_cfd, s, 1);
gettimeofday(&t_time, NULL);
e_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
overall_time1 += (e_time-s1_time);
}
gettimeofday(&t_time, NULL);
e_time = ((double)t_time.tv_sec*1000.0) + ((double)t_time.tv_usec/1000.0);
overall_time = (e_time-s_time)/1000.0;
overall_time1 = overall_time1/1000.0;
cout<<"Req count: "<<req_cnt<<endl;
cout<<"Processing time: "<<overall_time1<<"s"<<endl;
cout<<"Throughput: "<<(req_cnt)/overall_time1<<endl;
cout<<"Closing connections"<<endl;
//close(nic_cfd);
close(nic_sfd);
return 0;
}
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9095
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using namespace std;
unordered_map<string, uint64_t> m;
unordered_map<string, uint64_t> master_data; // This hashmap emulates the master's DRAM
fd_set readfds;
// unordered_map<string, uint64_t> m;
vector<uint64_t> latv; // vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
struct Backup_Write {
static const Opcode opcode = BACKUP_WRITE;
static const ServiceType service = BACKUP_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
int main(int argc, char const *argv[])
{
int sock = 0, valread;
struct sockaddr_in serv_addr;
char *hello = "Master preprocessing done";
char buffer1[1024] = {0};
struct sockaddr_in address;
int opt = 1;
int addrlen = sizeof(address);
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
printf("\n Socket creation error \n");
return -1;
}
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(PORT);
// Convert IPv4 and IPv6 addresses from text to binary form
if(inet_pton(AF_INET, "10.129.2.181", &serv_addr.sin_addr)<=0)//192.168.200.21//10.129.2.181
{
printf("\nInvalid address/ Address not supported \n");
return -1;
}
if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0)
{
printf("\nConnection Failed \n");
return -1;
}
int sd, max_sd;
int csd, msd;
int max_clients = 1;
int activity;
int client_socket=0;
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
//setting up master's DRAM before responding to the synchronisation packet
for(int i = 0; i<2000000;i++)
{
master_data.insert({"1$"+to_string(i),1});
}
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
while(1)
{
//clear the socket set
FD_ZERO(&readfds);
//add master socket to set
FD_SET(sock, &readfds);
max_sd = sock;
//socket descriptor
sd = client_socket;
//if valid socket descriptor then add to read list
if(sd > 0)
FD_SET( sd , &readfds);
//highest file descriptor number, need it for the select function
if(sd > max_sd)
max_sd = sd;
//wait for an activity on one of the sockets , timeout is NULL ,
//so wait indefinitely
activity = select( max_sd + 1 , &readfds , NULL , NULL , NULL);
if ((activity < 0) && (errno!=EINTR))
{
printf("select error");
}
//If something happened on the master socket ,
if (FD_ISSET(sock, &readfds))
{
int n = 0;
static char buffer[2 * M] = {0};
n = read(sock, buffer, 2 * M);
if(n>0)
{
// The write request received is stored in this struct
struct Write::Request w1;
memcpy(&w1, buffer, sizeof(w1));
int a = w1.common.opcode;
D(printf("Opcode:%d\n",a ));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if(a==WRITE)
{
struct Write::Request w;
memcpy(&w, buffer, sizeof(w));
//debugging
D(printf("tableId:%lu\n",w.tableId ));
D(printf("key:%lu\n",w.key));
string s ="";
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s=s+to_string(w.tableId)+"$"+to_string(w.key);
D(printf("HashKey:%s\n",s.c_str()));
//Master checks the reject rules to respond with failure
//if operation is atomic and there is a version number mismatch
if(w.rejectRules.versionNeGiven)
{
string s ="";
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s=s+to_string(w.tableId)+"$"+to_string(w.key);
D(cout<<s<<"\n");
if (master_data.find(s) != master_data.end())
{
D(std::cout << "Key found\n");
uint64_t curr_version_number = master_data[s];
//compare curr_version_number with version number in w
if(w.rejectRules.givenVersion!=curr_version_number)
{
D(std::cout << "version number doesn't match\n");
//raise failure response
struct Write::Response wr;
wr.common.status=STATUS_WRONG_VERSION;
wr.tableId=w.tableId;
wr.key=w.key;
wr.version=curr_version_number;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n"," master raised failure response" ));
}
else
{
D(std::cout << "version number matches\n");
//update version number in master
master_data[s]=master_data[s]+(uint64_t)1;
D(printf("Updated master_data Key:%s Version:%lu\n",s.c_str(), master_data[s]));
//raise success response
struct Write::Response wr;
wr.common.status=STATUS_OK;
wr.tableId=w.tableId;
wr.key=w.key;
wr.version=master_data[s];
wr.timestamp=w.timestamp;
//storing paramters for debugging purposes
int stat = wr.common.status;
int table = wr.tableId;
int key = wr.key;
int vers = wr.version;
uint64_t ts = wr.timestamp;
D(printf("Master sent response Table:%d Key:%d Version:%d Status:%d Timestamp:%" PRIu64 "\n",table, key, vers, stat, ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
// printf("Master sent hashmap Key:%s Version:%lu\n",s.c_str(), vers);
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","master raised success response" ));
}
}
else
{
D(std::cout << "Key not found in master data\n");
struct Write::Response wr;
//raise failure response since object does not exist in master's DRAM
wr.common.status=STATUS_OBJECT_DOESNT_EXIST;
wr.version=1;
wr.tableId=w.tableId;
wr.key=w.key;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","object doesn't exist in master, sent auto failure" ));
}
}
else
{
//raise success response
struct Write::Response wr;
wr.common.status=STATUS_OK;
wr.version=1;
wr.tableId=w.tableId;
wr.key=w.key;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","non transaction packet, sent auto success\n" ));
}
}
//termination packet for debugging
else if(a==ILLEGAL_RPC_TYPE)
{
struct Write::Response wr;
wr.common.status=STATUS_MAX_VALUE;
wr.version=1;
wr.tableId=w1.tableId;
wr.key=w1.key;
wr.timestamp=w1.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
D(printf("%s\n","testing\n" ));
break;
}
// else error
else
{
struct Write::Response wr;
wr.common.status=STATUS_MAX_VALUE;
wr.version=1;
wr.tableId=w1.tableId;
wr.key=w1.key;
wr.timestamp=w1.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
D(printf("%s\n","testing\n" ));
break;
}
}
}
}
return 0;
}
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9090
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using namespace std;
// unordered_map<string, uint64_t> m;
vector<uint64_t> latv; // vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
int main(int argc, char const *argv[])
{
// printf("check\n");
// int bypass_offload = atoi(argv[1]); // first commandline argument, if 0,
// //then the code works with SmartNIC offload,
// // else it bypasses the offload
// int proportion = atoi(argv[2]); // second commandline argument, determines what
// //proportion of writes is inconsistent
// // (if proportion = x, then the
// // inconsistent to consistent write ratio is 1:x-1)
int sock = 0, valread;
struct sockaddr_in serv_addr;
char *hello = "Client preprocessing done";
char buffer1[1024] = {0};
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
printf("\n Socket creation error \n");
return -1;
}
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(PORT);
// struct timeval tv; // used in case very large packets show abnormal behaviour
// tv.tv_sec = 0;
// tv.tv_usec = 3000;
// if(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv)<0)
// {
// printf("\nsetsockopt failed \n");
// return -1;
// }
// Convert IPv4 and IPv6 addresses from text to binary form
// if(bypass_offload)
// {
// if(inet_pton(AF_INET, "192.168.220.60", &serv_addr.sin_addr)<=0)
// {
// printf("\nInvalid address/ Address not supported \n");
// return -1;
// }
// }
// else
// {
// if(inet_pton(AF_INET, "192.168.220.35", &serv_addr.sin_addr)<=0)
// {
// printf("\nInvalid address/ Address not supported \n");
// return -1;
// }
// }
if(inet_pton(AF_INET, "10.129.2.181", &serv_addr.sin_addr)<=0)//192.168.200.21//10.129.2.181
{
printf("\nInvalid address/ Address not supported \n");
return -1;
}
if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0)
{
printf("\nConnection Failed \n");
return -1;
}
// This part of the code synchronises the programs running on the master, client and the NIC
// it ensures that setup delays do not affect the experiments
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
int num_replies=0;
static char buffer[2 * M] = {0};
// the main body of the client begins here
struct Write::Request w; // The write request to be sent is generated in this struct
chrono::microseconds msStart = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now().time_since_epoch());
int i = 0;
int success = 0; // These parameters will measure the number of success
int failure = 0; // failure
int error = 0; // and error responses respectively
for(auto start = std::chrono::steady_clock::now(), now = start; now < start + std::chrono::seconds{10}; now = std::chrono::steady_clock::now())
{//the experiment runs for 60 seconds by default
i++;
w.tableId=1; // request workload is synchronised with master
// if(proportion>0) //determines proportion of inconsistent writes
// {
// if(i%proportion==0)
// {
// w.key=0;
// }
// else
// {
// w.key=i;
// }
// }
// else
// {
w.key=i;
// printf("%" PRIu64 "\n",w.key);
// }
//setting the relevant parameters of Write::Request packet
w.common.opcode=WRITE;
w.rejectRules.givenVersion=1;
w.rejectRules.versionNeGiven=1;
//Timestamp debugging
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts1 = ms.count();
D(printf("Client sent Timestamp1:%" PRIu64 "\n",ts1 ));
//setting the timestamp of Write::Request packet
w.timestamp=ms.count();
//sending the packet
send(sock , &w , sizeof(struct Write::Request), 0 );
//Debugging
D(printf("Client sent request Timestamp:%" PRIu64 "\n",w.timestamp ));
num_replies++;
D(printf("Write::Request message sent\n"));
// since this client runs in interactive mode, it waits for each response before generating new requests
int n = 0;
n = read(sock, buffer, 2 * M);
if(n>0)
{
// The write response received is stored in this struct
struct Write::Response wr;
//copying the contents of the buffer into the struct
memcpy(&wr, buffer, sizeof(wr));
//storing paramters for debugging purposes
int stat = wr.common.status;
int table = wr.tableId;
int key = wr.key;
int vers = wr.version;
uint64_t ts = wr.timestamp;
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Client received Timestamp1:%" PRIu64 "\n",ts ));
D(printf("Client current Timestamp:%" PRIu64 "\n",ts2 ));
// difference in timestamps == RTT for this packet
uint64_t tdelta = ts2 - ts;
D(printf("Client received response Table:%d Key:%d Version:%d Status:%d Timestamp:%" PRIu64 "Time Delta:%" PRIu64 "\n",table, key, vers, stat, ts, tdelta ));
//STATUS_WRONG_VERSION is used for termination packets while debugging
if(stat==STATUS_WRONG_VERSION||stat==STATUS_MAX_VALUE)
{
D(printf("Failure received:%d\n",stat ));
failure++;
latv.push_back(tdelta);
}
//packet is all zeroes, hence error
else if(ts==0)
{
D(printf("Error received:%d\n",stat ));
error++;
}
else if(stat==STATUS_OK)
{
D(printf("Success received:%d\n",stat ));
success++;
latv.push_back(tdelta);
}
else
{
D(printf("Error received:%d\n",stat ));
error++;
}
//Used to generate termination packets while debugging
// if(tdelta>1000000)
// {
// w.common.opcode=ILLEGAL_RPC_TYPE;
// send(sock , &w , sizeof(struct Write::Request), 0 );
// break;
// }
}
memset(&buffer[0], 0, sizeof(buffer));
}
//Printing experiment results to console
//and storing the latency of each packet in latencies_unsorted.txt, latencies.txt
chrono::microseconds msEnd = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now().time_since_epoch());
cout<<"Time taken:"<<msEnd.count() - msStart.count()<<"\n";
cout<<"Packets Received: "<<i<<"\n"<<"Success Received: "<<success<<"\n"<<"Failure Received: "<<failure<<"\n"<<"Error Received: "<<error<<"\n";
std::ofstream outFileUnsorted("latencies_unsorted.txt");
for (const auto &e : latv) outFileUnsorted << e << "\n";
std::sort (latv.begin(), latv.end());
std::ofstream outFile("latencies.txt");
for (const auto &e : latv) outFile << e << "\n";
auto nth = latv.begin() + (99*latv.size())/100;
std::nth_element(latv.begin(), nth, latv.end());
cout<<"Average Latency: "<<1.0 * std::accumulate(latv.begin(), latv.end()-1, 0LL) / std::distance(latv.begin(), latv.end()-1)<<"\n";
cout<<"Tail Latency (99%): "<<1.0 * std::accumulate(nth, latv.end()-1, 0LL) / std::distance(nth, latv.end()-1)<<"\n";
return 0;
}
// Threaded socket server - accepting multiple clients concurrently, by creating
// a new thread for each connecting client.
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <inttypes.h>
#include <sys/time.h>
#include "utils.h"
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using namespace std;
fd_set readfds;
unordered_map<string, uint64_t> m; //The hashmap on NIC that stores the most recent version number of each object that is created/updated
int backupsockfd;
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
struct Backup_Write {
static const Opcode opcode = BACKUP_WRITE;
static const ServiceType service = BACKUP_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
typedef struct { int sockfd; } thread_config_t;
typedef enum { WAIT_FOR_MSG, IN_MSG } ProcessingState;
void serve_connection(int sockfd) {
if (send(sockfd, "*", 1, 0) < 1) {
perror_die("send");
}
//////////////////////////////////////////////////////
static char buffer[2 * M] = {0};
static char buffer2[2 * M] = {0};
while(1)
{ // The write request received is stored in this struct
int valread=0;
if ((valread = read( sockfd , buffer, 2 * M)) == 0)
{
printf("Some business1");
break;
}
// printf("III\n");
struct Write::Request w1;
//copying the contents of the buffer into the struct
memcpy(&w1, buffer, sizeof(w1));
int a = w1.common.opcode;
D(printf("Opcode:%d\n",a ));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if(a==WRITE)
{
struct Write::Request w;
memcpy(&w, buffer, sizeof(w));
//debugging
D(printf("tableId:%lu\n",w.tableId ));
D(printf("key:%lu\n",w.key));
//Check the reject rules to confirm
// whether the write packet was for an atomic operation
struct Write::Request br;
// br.common.status=STATUS_OK;
// br.rejectRules.givenVersion=0;
// br.tableId=w.tableId;
// br.key=w.key;
br.timestamp=w.timestamp;
//storing paramters for debugging purposes
// int stat = br.common.status;
// uint64_t vers = br.rejectRules.givenVersion;
// int table = br.tableId;
// int key = br.key;
uint64_t ts = br.timestamp;
D(printf("Master sent response Timestamp:%" PRIu64 "\n", ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
send(backupsockfd , &w , sizeof(struct Write::Request), 0 );
int valread=0;
if ((valread = read( backupsockfd , buffer2, 2 * M)) == 0)
{
printf("Some business2");
}
else
{
// The write response received is stored in this struct
struct Write::Response wr;
memcpy(&wr, buffer2, sizeof(wr));
//debugging
uint64_t ts = wr.timestamp;
D(printf("Server received response Timestamp:%" PRIu64 "\n",ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Server received response at Timestamp:%" PRIu64 "\n", ts2 ));
//send response to client
send(sockfd , &wr , sizeof(struct Write::Response), 0 );
}
}
}
////////////////////////////////
close(sockfd);
}
void* server_thread(void* arg) {
thread_config_t* config = (thread_config_t*)arg;
int sockfd = config->sockfd;
free(config);
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned long id = (unsigned long)pthread_self();
printf("Thread %lu created to handle connection with socket %d\n", id,
sockfd);
serve_connection(sockfd);
printf("Thread %lu done\n", id);
return 0;
}
int main(int argc, char** argv) {
setvbuf(stdout, NULL, _IONBF, 0);
int portnum = 9090;
int backupnum = 9095;
if (argc >= 2) {
portnum = atoi(argv[1]);
}
printf("Serving on port %d\n", portnum);
fflush(stdout);
int backupfd = listen_inet_socket(backupnum);
struct sockaddr_in backup_peer_addr;
socklen_t backup_peer_addr_len = sizeof(backup_peer_addr);
// int backupsockfd = // declared global so that all threads have access.
backupsockfd = accept(backupfd, (struct sockaddr*)&backup_peer_addr, &backup_peer_addr_len);
if (backupsockfd < 0) {
perror_die("ERROR on accept");
}
report_backup_connected(&backup_peer_addr, backup_peer_addr_len);
int sockfd = listen_inet_socket(portnum);
while (1) {
struct sockaddr_in peer_addr;
socklen_t peer_addr_len = sizeof(peer_addr);
int newsockfd =
accept(sockfd, (struct sockaddr*)&peer_addr, &peer_addr_len);
if (newsockfd < 0) {
perror_die("ERROR on accept");
}
report_peer_connected(&peer_addr, peer_addr_len);
pthread_t the_thread;
thread_config_t* config = (thread_config_t*)malloc(sizeof(*config));
if (!config) {
die("OOM");
}
config->sockfd = newsockfd;
pthread_create(&the_thread, NULL, server_thread, config);
// Detach the thread - when it's done, its resources will be cleaned up.
// Since the main thread lives forever, it will outlive the serving threads.
pthread_detach(the_thread);
}
return 0;
}
\ No newline at end of file
// Threaded socket server - accepting multiple clients concurrently, by creating
// a new thread for each connecting client.
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <inttypes.h>
#include <sys/time.h>
#include "utils.h"
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using namespace std;
fd_set readfds;
unordered_map<string, uint64_t> m; //The hashmap on NIC that stores the most recent version number of each object that is created/updated
int backupsockfd;
int mastersockfd;
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
struct Backup_Write {
static const Opcode opcode = BACKUP_WRITE;
static const ServiceType service = BACKUP_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
typedef struct { int sockfd; } thread_config_t;
void* master_thread_fn(void* arg) {
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned long id = (unsigned long)pthread_self();
D(printf("Master thread %lu created \n", id));
static char buffer[2 * M] = {0};
send(mastersockfd , arg , sizeof(struct Write::Request), 0 );
// bool* retval=NULL;
int valread=0;
if ((valread = read( mastersockfd , buffer, 2 * M)) == 0)
{
printf("Some master business2");
}
else
{
// The write response received is stored in this struct
struct Write::Response wr;
memcpy(&wr, buffer, sizeof(wr));
//debugging
// *retval=!wr.common.status;
uint64_t ts = wr.timestamp;
D(printf("Server received response Timestamp:%" PRIu64 "\n",ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Server received response at Timestamp:%" PRIu64 "\n", ts2 ));
}
D(printf("Thread %lu done\n", id));
// int *retval;
// *retval=1;
// return retval;// use clearer logic
return 0;
}
void* backup_thread_fn(void* arg) {
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned long id = (unsigned long)pthread_self();
D(printf("Backup thread %lu created \n", id));
static char buffer[2 * M] = {0};
send(backupsockfd , arg , sizeof(struct Write::Request), 0 );
int valread=0;
// bool* retval=NULL;
if ((valread = read( backupsockfd , buffer, 2 * M)) == 0)
{
printf("Some backup business2");
}
else
{
// The write response received is stored in this struct
struct Write::Response wr;
memcpy(&wr, buffer, sizeof(wr));
//debugging
// *retval=!wr.common.status;
uint64_t ts = wr.timestamp;
D(printf("Server received response Timestamp:%" PRIu64 "\n",ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Server received response at Timestamp:%" PRIu64 "\n", ts2 ));
}
D(printf("Thread %lu done\n", id));
// int *retval;
// *retval=1;
// return retval;// use clearer logic
return 0;
}
void serve_connection(int sockfd) {
// if (send(sockfd, "*", 1, 0) < 1) {
// perror_die("send");
// }
//////////////////////////////////////////////////////
static char buffer[2 * M] = {0};
static char buffer2[2 * M] = {0};
while(1)
{ // The write request received is stored in this struct
int valread=0;
if ((valread = read( sockfd , buffer, 2 * M)) == 0)
{
printf("Some business1");
break;
}
// printf("III\n");
struct Write::Request w1;
//copying the contents of the buffer into the struct
memcpy(&w1, buffer, sizeof(w1));
int a = w1.common.opcode;
D(printf("Opcode:%d\n",a ));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if(a==WRITE)
{
struct Write::Request w;
memcpy(&w, buffer, sizeof(w));
//debugging
D(printf("tableId:%lu\n",w.tableId ));
D(printf("key:%lu\n",w.key));
//Check the reject rules to confirm
// whether the write packet was for an atomic operation
struct Write::Request br;
// br.common.status=STATUS_OK;
// br.rejectRules.givenVersion=0;
// br.tableId=w.tableId;
// br.key=w.key;
br.timestamp=w.timestamp;
//storing paramters for debugging purposes
// int stat = br.common.status;
// uint64_t vers = br.rejectRules.givenVersion;
// int table = br.tableId;
// int key = br.key;
uint64_t ts = br.timestamp;
D(printf("Master sent response Timestamp:%" PRIu64 "\n", ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
pthread_t master_thread;
pthread_t backup_thread;
pthread_create(&master_thread, NULL, master_thread_fn, &w);
pthread_create(&backup_thread, NULL, backup_thread_fn, &w);
// void *master_res;
// void *backup_res;
pthread_join(master_thread, NULL);// &master_res);
D(printf("master joined\n"));
pthread_join(backup_thread, NULL);// &backup_res);
D(printf("backup joined\n"));
//send response to client
// int mre = *(bool*)master_res;
// int bre = *(bool*)backup_res;
// printf("%d %d",mre,bre);
// free(master_res);
// free(backup_res);
// if(mre*bre>0)
// {
struct Write::Response wr;
wr.common.status=STATUS_OK;
wr.timestamp=w.timestamp;
send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// }
// else
// {
// struct Write::Response wr;
// wr.common.status=STATUS_MAX_VALUE;
// wr.timestamp=w.timestamp;
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// }
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// pthread_join(master_thread[i], NULL);
// D(printf("master joined\n"));
// pthread_join(backup_thread[i], NULL);
// D(printf("backup joined\n"));
// // Detach the thread - when it's done, its resources will be cleaned up.
// // Since the main thread lives forever, it will outlive the serving threads.
// pthread_detach(the_thread);
// send(backupsockfd , &w , sizeof(struct Write::Request), 0 );
// int valread=0;
// if ((valread = read( backupsockfd , buffer2, 2 * M)) == 0)
// {
// printf("Some business2");
// }
// else
// {
// // The write response received is stored in this struct
// struct Write::Response wr;
// memcpy(&wr, buffer2, sizeof(wr));
// //debugging
// uint64_t ts = wr.timestamp;
// D(printf("Server received response Timestamp:%" PRIu64 "\n",ts ));
// chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
// uint64_t ts2 = ms.count();
// D(printf("Server received response at Timestamp:%" PRIu64 "\n", ts2 ));
// //send response to client
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// }
}
}
////////////////////////////////
close(sockfd);
}
void* server_thread(void* arg) {
thread_config_t* config = (thread_config_t*)arg;
int sockfd = config->sockfd;
free(config);
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned long id = (unsigned long)pthread_self();
printf("Thread %lu created to handle connection with socket %d\n", id,
sockfd);
serve_connection(sockfd);
printf("Thread %lu done\n", id);
return 0;
}
int main(int argc, char** argv) {
setvbuf(stdout, NULL, _IONBF, 0);
int portnum = 9090;
int backupnum = 9095;
int masternum = 9099;
if (argc >= 2) {
portnum = atoi(argv[1]);
}
printf("Serving on port %d\n", portnum);
fflush(stdout);
int masterfd = listen_inet_socket(masternum);
struct sockaddr_in master_peer_addr;
socklen_t master_peer_addr_len = sizeof(master_peer_addr);
// int mastersockfd = // declared global so that all threads have access.
mastersockfd = accept(masterfd, (struct sockaddr*)&master_peer_addr, &master_peer_addr_len);
if (mastersockfd < 0) {
perror_die("ERROR on accept");
}
report_backup_connected(&master_peer_addr, master_peer_addr_len);
int backupfd = listen_inet_socket(backupnum);
struct sockaddr_in backup_peer_addr;
socklen_t backup_peer_addr_len = sizeof(backup_peer_addr);
// int backupsockfd = // declared global so that all threads have access.
backupsockfd = accept(backupfd, (struct sockaddr*)&backup_peer_addr, &backup_peer_addr_len);
if (backupsockfd < 0) {
perror_die("ERROR on accept");
}
report_backup_connected(&backup_peer_addr, backup_peer_addr_len);
int sockfd = listen_inet_socket(portnum);
while (1) {
struct sockaddr_in peer_addr;
socklen_t peer_addr_len = sizeof(peer_addr);
int newsockfd =
accept(sockfd, (struct sockaddr*)&peer_addr, &peer_addr_len);
if (newsockfd < 0) {
perror_die("ERROR on accept");
}
report_peer_connected(&peer_addr, peer_addr_len);
pthread_t the_thread;
thread_config_t* config = (thread_config_t*)malloc(sizeof(*config));
if (!config) {
die("OOM");
}
config->sockfd = newsockfd;
pthread_create(&the_thread, NULL, server_thread, config);
// Detach the thread - when it's done, its resources will be cleaned up.
// Since the main thread lives forever, it will outlive the serving threads.
pthread_detach(the_thread);
}
return 0;
}
\ No newline at end of file
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9099
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using namespace std;
unordered_map<string, uint64_t> m;
unordered_map<string, uint64_t> master_data; // This hashmap emulates the master's DRAM
fd_set readfds;
// unordered_map<string, uint64_t> m;
vector<uint64_t> latv; // vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
struct Backup_Write {
static const Opcode opcode = BACKUP_WRITE;
static const ServiceType service = BACKUP_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
uint64_t key;
uint64_t timestamp;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
uint8_t array[15];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t tableId;
uint64_t key;
uint64_t version;
uint64_t timestamp;
uint64_t array[8];
#ifdef P1000
uint64_t array2[112];
uint8_t array3[4];
#endif
#ifdef P10000
uint64_t array2[112];
uint8_t array3[4];
uint64_t array4[1125];
#endif
} __attribute__((packed));
};
int main(int argc, char const *argv[])
{
int sock = 0, valread;
struct sockaddr_in serv_addr;
char *hello = "Master preprocessing done";
char buffer1[1024] = {0};
struct sockaddr_in address;
int opt = 1;
int addrlen = sizeof(address);
if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
printf("\n Socket creation error \n");
return -1;
}
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(PORT);
// Convert IPv4 and IPv6 addresses from text to binary form
if(inet_pton(AF_INET, "192.168.200.21", &serv_addr.sin_addr)<=0)//10.129.2.181
{
printf("\nInvalid address/ Address not supported \n");
return -1;
}
if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0)
{
printf("\nConnection Failed \n");
return -1;
}
int sd, max_sd;
int csd, msd;
int max_clients = 1;
int activity;
int client_socket=0;
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
//setting up master's DRAM before responding to the synchronisation packet
for(int i = 0; i<2000000;i++)
{
master_data.insert({"1$"+to_string(i),1});
}
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
while(1)
{
//clear the socket set
FD_ZERO(&readfds);
//add master socket to set
FD_SET(sock, &readfds);
max_sd = sock;
//socket descriptor
sd = client_socket;
//if valid socket descriptor then add to read list
if(sd > 0)
FD_SET( sd , &readfds);
//highest file descriptor number, need it for the select function
if(sd > max_sd)
max_sd = sd;
//wait for an activity on one of the sockets , timeout is NULL ,
//so wait indefinitely
activity = select( max_sd + 1 , &readfds , NULL , NULL , NULL);
if ((activity < 0) && (errno!=EINTR))
{
printf("select error");
}
//If something happened on the master socket ,
if (FD_ISSET(sock, &readfds))
{
int n = 0;
static char buffer[2 * M] = {0};
n = read(sock, buffer, 2 * M);
if(n>0)
{
// The write request received is stored in this struct
struct Write::Request w1;
memcpy(&w1, buffer, sizeof(w1));
int a = w1.common.opcode;
D(printf("Opcode:%d\n",a ));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if(a==WRITE)
{
struct Write::Request w;
memcpy(&w, buffer, sizeof(w));
//debugging
D(printf("tableId:%lu\n",w.tableId ));
D(printf("key:%lu\n",w.key));
string s ="";
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s=s+to_string(w.tableId)+"$"+to_string(w.key);
D(printf("HashKey:%s\n",s.c_str()));
//Master checks the reject rules to respond with failure
//if operation is atomic and there is a version number mismatch
if(w.rejectRules.versionNeGiven)
{
string s ="";
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s=s+to_string(w.tableId)+"$"+to_string(w.key);
D(cout<<s<<"\n");
if (master_data.find(s) != master_data.end())
{
D(std::cout << "Key found\n");
uint64_t curr_version_number = master_data[s];
//compare curr_version_number with version number in w
if(w.rejectRules.givenVersion!=curr_version_number)
{
D(std::cout << "version number doesn't match\n");
//raise failure response
struct Write::Response wr;
wr.common.status=STATUS_WRONG_VERSION;
wr.tableId=w.tableId;
wr.key=w.key;
wr.version=curr_version_number;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n"," master raised failure response" ));
}
else
{
D(std::cout << "version number matches\n");
//update version number in master
master_data[s]=master_data[s]+(uint64_t)1;
D(printf("Updated master_data Key:%s Version:%lu\n",s.c_str(), master_data[s]));
//raise success response
struct Write::Response wr;
wr.common.status=STATUS_OK;
wr.tableId=w.tableId;
wr.key=w.key;
wr.version=master_data[s];
wr.timestamp=w.timestamp;
//storing paramters for debugging purposes
int stat = wr.common.status;
int table = wr.tableId;
int key = wr.key;
int vers = wr.version;
uint64_t ts = wr.timestamp;
D(printf("Master sent response Table:%d Key:%d Version:%d Status:%d Timestamp:%" PRIu64 "\n",table, key, vers, stat, ts ));
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
// printf("Master sent hashmap Key:%s Version:%lu\n",s.c_str(), vers);
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","master raised success response" ));
}
}
else
{
D(std::cout << "Key not found in master data\n");
struct Write::Response wr;
//raise failure response since object does not exist in master's DRAM
wr.common.status=STATUS_OBJECT_DOESNT_EXIST;
wr.version=1;
wr.tableId=w.tableId;
wr.key=w.key;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","object doesn't exist in master, sent auto failure" ));
}
}
else
{
//raise success response
struct Write::Response wr;
wr.common.status=STATUS_OK;
wr.version=1;
wr.tableId=w.tableId;
wr.key=w.key;
wr.timestamp=w.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
D(printf("%s\n","non transaction packet, sent auto success\n" ));
}
}
//termination packet for debugging
else if(a==ILLEGAL_RPC_TYPE)
{
struct Write::Response wr;
wr.common.status=STATUS_MAX_VALUE;
wr.version=1;
wr.tableId=w1.tableId;
wr.key=w1.key;
wr.timestamp=w1.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
D(printf("%s\n","testing\n" ));
break;
}
// else error
else
{
struct Write::Response wr;
wr.common.status=STATUS_MAX_VALUE;
wr.version=1;
wr.tableId=w1.tableId;
wr.key=w1.key;
wr.timestamp=w1.timestamp;
send(sock , &wr , sizeof(struct Write::Response), 0 );
chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
uint64_t ts2 = ms.count();
D(printf("Master sent response at Timestamp:%" PRIu64 "\n", ts2 ));
D(printf("%s\n","testing\n" ));
break;
}
}
}
}
return 0;
}
// Utility functions for socket servers in C.
//
// Eli Bendersky [http://eli.thegreenplace.net]
// This code is in the public domain.
#include "utils.h"
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
// #define _GNU_SOURCE
#include <netdb.h>
#define N_BACKLOG 64
void die(char* fmt, ...) {
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
exit(EXIT_FAILURE);
}
void* xmalloc(size_t size) {
void* ptr = malloc(size);
if (!ptr) {
die("malloc failed");
}
return ptr;
}
void perror_die(char* msg) {
perror(msg);
exit(EXIT_FAILURE);
}
void report_peer_connected(const struct sockaddr_in* sa, socklen_t salen) {
char hostbuf[NI_MAXHOST];
char portbuf[NI_MAXSERV];
if (getnameinfo((struct sockaddr*)sa, salen, hostbuf, NI_MAXHOST, portbuf,
NI_MAXSERV, 0) == 0) {
printf("peer (%s, %s) connected\n", hostbuf, portbuf);
} else {
printf("peer (unknonwn) connected\n");
}
}
void report_backup_connected(const struct sockaddr_in* sa, socklen_t salen) {
char hostbuf[NI_MAXHOST];
char portbuf[NI_MAXSERV];
if (getnameinfo((struct sockaddr*)sa, salen, hostbuf, NI_MAXHOST, portbuf,
NI_MAXSERV, 0) == 0) {
printf("backup (%s, %s) connected\n", hostbuf, portbuf);
} else {
printf("backup (unknonwn) connected\n");
}
}
int listen_inet_socket(int portnum) {
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
perror_die("ERROR opening socket");
}
// This helps avoid spurious EADDRINUSE when the previous instance of this
// server died.
int opt = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
perror_die("setsockopt");
}
struct sockaddr_in serv_addr;
memset(&serv_addr, 0, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = INADDR_ANY;
serv_addr.sin_port = htons(portnum);
if (bind(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) < 0) {
perror_die("ERROR on binding");
}
if (listen(sockfd, N_BACKLOG) < 0) {
perror_die("ERROR on listen");
}
return sockfd;
}
void make_socket_non_blocking(int sockfd) {
int flags = fcntl(sockfd, F_GETFL, 0);
if (flags == -1) {
perror_die("fcntl F_GETFL");
}
if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) {
perror_die("fcntl F_SETFL O_NONBLOCK");
}
}
\ No newline at end of file
// Utility functions for socket servers in C.
#ifndef UTILS_H
#define UTILS_H
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
// Dies (exits with a failure status) after printing the given printf-like
// message to stdout.
void die(char* fmt, ...);
// Wraps malloc with error checking: dies if malloc fails.
void* xmalloc(size_t size);
// Dies (exits with a failure status) after printing the current perror status
// prefixed with msg.
void perror_die(char* msg);
// Reports a peer connection to stdout. sa is the data populated by a successful
// accept() call.
void report_peer_connected(const struct sockaddr_in* sa, socklen_t salen);
// Reports a backup connection to stdout. sa is the data populated by a successful
// accept() call.
void report_backup_connected(const struct sockaddr_in* sa, socklen_t salen);
// Creates a bound and listening INET socket on the given port number. Returns
// the socket fd when successful; dies in case of errors.
int listen_inet_socket(int portnum);
// Sets the given socket into non-blocking mode.
void make_socket_non_blocking(int sockfd);
#endif /* UTILS_H */
#ifndef __BUFFER_CC__
#define __BUFFER_CC__
#include <algorithm>
#include <thread>
#include <vector>
#include "Buffer.hpp"
#include "common.hpp"
#include "queue_context.hpp"
Buffer::Buffer(size_t buf_size) {
this->buffer_size = buf_size;
this->buffer.resize(buf_size);
// this->ready_events.resize(buf_size);
for(int i=0; i<buf_size; i++) {
this->buffer[i].fd_id = -1;
this->buffer[i].in_use = false;
this->buffer[i].cr = NULL;
// this->ready_events[i] = -1;
}
if(pthread_mutex_init(&(this->buffer_lock), NULL)) {
perror("Unable to init buffer lock");
exit(-1);
}
}
Buffer::~Buffer() {
for(int i=0; i<this->buffer_size; i++) {
if(this->buffer[i].fd_id==-1) continue;
this->buffer[i].fd_id = -1;
free(this->buffer[i].cr);
this->buffer[i].cr = NULL;
free(this->buffer[i].job);
this->buffer[i].job = NULL;
}
pthread_mutex_destroy(&(this->buffer_lock));
}
int Buffer::add_element(int fd, job_context *job, Common_Request *cr) {
bool found = false;
for(int i=0; i<this->buffer_size; i++) {
if(this->buffer[i].fd_id >= 0) continue;
else {
if(job->transport_type == TCP_IP_TRANSPORT) {
this->buffer[i].fd_id = job->tcp_transport->get_conn_fd();
}
else if(job->transport_type == RDMA_RC_TRANSPORT) {
this->buffer[i].fd_id = job->rdma_transport->get_conn_fd();
}
this->buffer[i].cr = cr;
this->buffer[i].job = job;
// this->ready_events[i] = 1;
found = true;
break;
}
}
return found? 0 : -1;
}
void Buffer::erase_element(int fd) {
for(int i=0; i<this->buffer_size; i++) {
if(this->buffer[i].fd_id != fd) continue;
else {
this->buffer[i].fd_id = -1;
this->buffer[i].in_use = false;
this->buffer[i].cr = NULL;
this->buffer[i].job = NULL;
// this->ready_events[i] = -1;
break;
}
}
return;
}
void Buffer::erase_element_by_pos(int pos) {
this->buffer[pos].fd_id = -1;
this->buffer[pos].in_use = false;
free(this->buffer[pos].cr);
this->buffer[pos].cr = NULL;
free(this->buffer[pos].job);
this->buffer[pos].job = NULL;
// this->ready_events[i] = -1;
return;
}
int Buffer::poll(int fd) {
pthread_mutex_lock(&(this->buffer_lock));
int i;
int j = -1;
for(i=0; i<this->buffer_size; i++) {
if(this->buffer[i].fd_id==fd && !this->buffer[i].in_use) {
this->buffer[i].in_use = true;
j = i;
break;
}
}
pthread_mutex_unlock(&(this->buffer_lock));
return j;
}
Buffer_Element Buffer::get_element(int pos) {
return this->buffer[pos];
}
#endif
\ No newline at end of file
#ifndef __BUFFER_H__
#define __BUFFER_H__
#include <thread>
#include <vector>
#include "common.hpp"
#include "queue_context.hpp"
struct Buffer_Element {
int fd_id; // using fd as an id for threads
bool in_use;
struct Common_Request *cr;
struct job_context *job;
Buffer_Element() {this->fd_id = -1; this->cr = NULL;}
};
class Buffer {
private:
pthread_mutex_t buffer_lock;
public:
// std::vector<int> ready_events;
size_t buffer_size;
std::vector<Buffer_Element> buffer;
Buffer(size_t buf_size);
~Buffer();
int add_element(int fd, job_context *job, Common_Request *cr);
void erase_element(int fd);
void erase_element_by_pos(int pos);
int poll();
int poll(int fd);
Buffer_Element get_element(int pos);
};
#endif
\ No newline at end of file
#include <iostream>
#include <thread>
#include <pthread.h>
#include "queue_context.hpp"
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
using namespace std;
Thread_Safe_Queue q;
void work(int a) {
struct job_context* t = q.get_job();
if(t==NULL) {
cout << "Queue Empty" <<endl;
return;
}
cout<<"Got job"<<endl;
RDMA_config *config = t->rdma_transport->get_config();
cout<<config->mr.mr_size<<endl;
}
int main() {
RDMA_Transport* t = new RDMA_Transport();
cout<<q.queue_size<<endl;
q.enqueue(t, RDMA_RC_WRITE);
cout<<q.queue_size<<endl;
thread th(work, 0);
thread th1(work, 1);
th.join();
th1.join();
return 0;
}
\ No newline at end of file
#ifndef __CLI_API_CC__
#define __CLI_API_CC__
#include <iostream>
#include <string>
#include <vector>
#include "cli_api.hpp"
namespace { //to make strip_whitespaces not create an error with
//with same name function in read_config
//still a hack but have to do
std::string strip_whitespaces(std::string str) {
std::string::iterator itbegin = str.begin();
std::string::iterator itend = str.end();
while(itbegin!=str.end() && (*itbegin)==' ') itbegin++;
while(itend!=str.begin() && (*itend)==' ') itend--;
return std::string(itbegin, itend);
}
}
std::vector<std::string> tokenize(std::string raw_str) {
std::vector<std::string> tokens;
std::string stripped_str = strip_whitespaces(raw_str);
std::string temp;
std::string::iterator it = stripped_str.begin();
while(it!=stripped_str.end()) {
if(*(it)==' ') {
while(temp[temp.size()-1] == '\n') {
temp.pop_back();
}
tokens.push_back(temp);
temp.clear();
while(*(it)==' ' && it!=stripped_str.end()) it++;
}
else {
temp.push_back(*(it));
it++;
}
}
if(!temp.empty()) {
while(temp[temp.size()-1] == '\n') {
temp.pop_back();
}
tokens.push_back(temp);
}
return tokens;
}
std::string get_key(char *kv) {
std::string key;
char* t = kv;
while(*t!='\0') {
key.push_back(*t);
t++;
}
return key;
}
std::string get_val(char* kv) {
std::string val;
char* t = kv;
while(*t!='\0') {
t++;
}
t++;
while(*t!='\0') {
val.push_back(*t);
}
return val;
}
std::pair<std::string, std::string> get_kv(char* kv) {
std::pair<std::string, std::string> ret_pair;
ret_pair.first = get_key(kv);
ret_pair.second = get_val(kv);
return ret_pair;
}
char* get_key_ptr(char* blob) {
return (char*) blob;
}
char* get_val_ptr(char* blob) {
size_t i = 0;
char ch;
ch = blob[0];
while(ch!='\0') {
ch = blob[++i];
}
i++;
return (blob + i);
}
size_t get_key_size_from_blob(char* blob) {
size_t i = 0;
char ch;
ch = blob[0];
while(ch!='\0') {
ch = blob[++i];
}
return i;
}
size_t get_val_size_from_blob(char* blob) {
char* ptr = blob;
size_t i = 0;
char ch;
ch = blob[0];
while(ch!='\0') {
ch = blob[++i];
}
i++;
ptr = blob+i;
ch = *ptr;
i = 0;
while(ch!='\0') {
ch = ptr[++i];
}
return i;
}
size_t get_key_size_from_start_ptr(char* key_ptr) {
size_t i = 0;
char ch;
ch = key_ptr[0];
while(ch!='\0') {
ch = key_ptr[++i];
}
return i;
}
size_t get_val_size_from_start_ptr(char* val_ptr) {
size_t i = 0;
char ch;
ch = val_ptr[0];
while(ch!='\0') {
ch = val_ptr[++i];
}
return i;
}
#endif
\ No newline at end of file
#ifndef __CLI_API_H__
#define __CLI_API_H__
#include <string>
#include <vector>
std::vector<std::string> tokenize(std::string raw_str);
std::string get_key(char* kv);
std::string get_val(char* kv);
std::pair<std::string, std::string> get_kv(char* kv);
char* get_key_ptr(char* blob);
char* get_val_ptr(char* blob);
size_t get_key_size_from_blob(char* blob);
size_t get_val_size_from_blob(char* blob);
size_t get_key_size_from_start_ptr(char* key_ptr);
size_t get_val_size_from_start_ptr(char* val_ptr);
#endif
\ No newline at end of file
#include <iostream>
#include <cstring>
#include <string>
#include <vector>
#include "cli_api.hpp"
using namespace std;
int main() {
char* ip = (char*) malloc(1024);
vector<string> tokens;
while(true) {
memset(ip, 0, 1024);
cin.getline(ip, 1024);
printf("Got raw cmd: %s\n", ip);
tokens = tokenize(string(ip));
if(tokens[0].compare("exit")==0 || tokens[0].compare("EXIT")==0) {
printf("Got exit: %s\n", tokens[0].c_str());
break;
}
printf("Command\t\tArg1\t\tArg2\n");
printf("%s\t\t%s\t\t%s\n", tokens[0].c_str(), tokens[1].c_str(), tokens[2].c_str());
}
}
\ No newline at end of file
#ifndef __CLIENT_FUNCTIONS_CC__
#define __CLIENT_FUNCTIONS_CC__
#include <chrono>
#include <stddef.h>
#include "common.hpp"
namespace chrono = std::chrono;
int read_request(TCP_Transport *transport, char* key, size_t key_size) {
// ignore for now
return 0;
}
int read_request(RDMA_Transport *transport) {
if(transport->one_sided_read()) {
// error
return -1;
}
return 0;
}
int write_request(TCP_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size) {
size_t final_size = sizeof(struct Common_Request) + key_size + 1 + val_size;
char* final_packet = (char*) malloc(final_size);
char* t;
Common_Request *w = (Common_Request*) final_packet;
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->req.w_request.common.opcode = WRITE;
w->service_type = MASTER_SERVICE;
w->req.w_request.length = key_size + val_size;
t = final_packet + sizeof(struct Common_Request);
memcpy(t, key, key_size);
t = final_packet + sizeof(struct Common_Request) + key_size;
*t = '\0';
t++;
memcpy(t, val, val_size);
transport->mr = final_packet;
transport->mr_size = final_size;
if(transport->send_data()) {
//error
free(final_packet);
return -1;
}
free(final_packet);
return 0;
}
int write_request(RDMA_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size) {
size_t final_size = sizeof(struct Common_Request) + key_size + 1 + val_size;
char* final_packet = (char*) malloc(final_size);
char* t;
Common_Request *w = (Common_Request*) final_packet;
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->req.w_request.common.opcode = WRITE;
w->service_type = MASTER_SERVICE;
w->req.w_request.length = key_size + val_size;
t = final_packet + sizeof(struct Common_Request);
memcpy(t, key, key_size);
t = final_packet + sizeof(struct Common_Request) + key_size;
*t = '\0';
t++;
memcpy(t, val, val_size);
transport->copy_to_mr(final_packet, final_size);
if(transport->one_sided_write()) {
//error
free(final_packet);
return -1;
}
free(final_packet);
return 0;
}
int write_request(TCP_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size) {
size_t final_size = sizeof(struct Common_Request) + key_size + 1 + val_size;
char* final_packet = (char*) malloc(final_size);
char* t;
Common_Request *w = (Common_Request*) final_packet;
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->req.w_request.common.opcode = WRITE;
w->service_type = MASTER_SERVICE;
w->req.w_request.length = key_size + val_size;
t = final_packet + sizeof(struct Common_Request);
memcpy(t, key, key_size);
t = final_packet + sizeof(struct Common_Request) + key_size;
*t = '\0';
t++;
memcpy(t, val, val_size);
transport->mr = final_packet;
transport->mr_size = final_size;
if(transport->send_data()) {
//error
free(final_packet);
return -1;
}
free(final_packet);
return 0;
}
int write_request(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size) {
size_t final_size = sizeof(struct Common_Request) + key_size + 1 + val_size + 1;
char* final_packet = (char*) malloc(final_size);
char* t;
memset(final_packet, 0, final_size);
Common_Request *w = (Common_Request*) final_packet;
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->req.w_request.common.opcode = WRITE;
w->service_type = MASTER_SERVICE;
w->req.w_request.length = key_size + 1 + val_size + 1;
w->request_start_time = chrono::steady_clock::now();
t = final_packet + sizeof(struct Common_Request);
memcpy(t, key, key_size);
t = final_packet + sizeof(struct Common_Request) + key_size;
*t = '\0';
t++;
memcpy(t, val, val_size);
memcpy(transport->get_mr_addr(), final_packet, final_size);
if(transport->one_sided_write()) {
//error
free(final_packet);
return -1;
}
free(final_packet);
return 0;
}
int read_rpc(TCP_Transport *transport, char* key, size_t key_size) {
//pass for now
return 0;
}
int read_rpc(RDMA_Transport *transport, char* key, size_t key_size) {
//pass for now
return 0;
}
int read_rpc(RDMA_Transport *transport, const char* key, size_t key_size) {
// first if key is not null
if(key == NULL) {
return -1;
}
// key could be valid, send req to master to initiate read
size_t final_size = sizeof(Common_Request) + key_size + 1;
char* final_packet = (char*) malloc(final_size);
memset(final_packet, 0, final_size);
Common_Request *cr;
Common_Request prep_request, ack;
cr = (Common_Request*) final_packet;
cr->opcode = READ;
cr->service_type = MASTER_SERVICE;
cr->type = TYPE_REQUEST;
cr->req.r_request.common.opcode = READ;
cr->req.r_request.common.service = MASTER_SERVICE;
cr->req.r_request.keyLength = key_size;
memcpy(final_packet+sizeof(Common_Request), key, key_size);
memcpy(transport->get_mr_addr(), final_packet, final_size);
// send main intiating request
TCP_Transport *tcp_transport = transport->get_tcp_conn();
tcp_transport->send_data((char*)cr, sizeof(Common_Request));
// wait for master to send prep request for reading
tcp_transport->recv_data((char*)&prep_request, sizeof(Common_Request));
// one sided read from MASTER MR
read_request(transport);
// send ack to MASTER
ack.opcode = READ;
ack.service_type = MASTER_SERVICE;
ack.type = TYPE_RESPONSE;
ack.req.r_response.common.status = STATUS_OK;
ack.req.r_response.length = 0;
std::string value(transport->get_mr_addr()+sizeof(Common_Request));
if(debug) {
printf("value read is: %s\n", value.c_str());
}
tcp_transport->send_data((char*)&ack, sizeof(ack));
return 0;
}
int read_rpc(TCP_Transport *transport, std::string key, size_t key_size) {
//pass for now
return 0;
}
int read_rpc(RDMA_Transport *transport, std::string key, size_t key_size) {
//pass for now
return 0;
}
int write_rpc(TCP_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size) {
write_request(transport, key, key_size, val, val_size);
//check ack
Common_Request ack;
memset((void*)&ack, 0, sizeof(Common_Request));
transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode != STATUS_OK) {
//error
return -1;
}
//success
return 0;
}
int write_rpc(RDMA_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size) {
Common_Request cr;
Common_Request ack;
TCP_Transport *tcp_transport = transport->get_tcp_conn();
memset((void*)&cr, 0, sizeof(cr));
cr.opcode = WRITE;
cr.req.w_request.common.opcode = WRITE;
cr.req.w_request.length = 0;
cr.service_type = MASTER_SERVICE;
cr.req.w_request.common.service = MASTER_SERVICE;
write_request(transport, key, key_size, val, val_size);
// set_mr does a malloc, so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport->send_data((char*)&cr, sizeof(cr));
//check ack
memset((void*)&ack, 0, sizeof(ack));
tcp_transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode != STATUS_OK) {
//error
return -1;
}
//success
return 0;
}
int write_rpc(TCP_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size) {
write_request(transport, key, key_size, val, val_size);
//check ack
Common_Request ack;
memset((void*)&ack, 0, sizeof(Common_Request));
transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode != STATUS_OK) {
//error
return -1;
}
//success
return 0;
}
int write_rpc(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size) {
Common_Request cr;
Common_Request ack;
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
TCP_Transport *tcp_transport = transport->get_tcp_conn();
memset((void*)&cr, 0, sizeof(cr));
cr.opcode = WRITE;
cr.type = TYPE_REQUEST;
cr.req.w_request.common.opcode = WRITE;
cr.req.w_request.length = 0;
cr.service_type = MASTER_SERVICE;
cr.req.w_request.common.service = MASTER_SERVICE;
if(analyze) {
start_time = chrono::steady_clock::now();
}
write_request(transport, key, key_size, val, val_size);
// set_mr does a malloc so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport->send_data((char*)&cr, sizeof(cr));
//check ack
memset((void*)&ack, 0, sizeof(ack));
tcp_transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode != STATUS_OK) {
//error
return -1;
}
end_time = chrono::steady_clock::now();
if(analyze) {
client_rtt_time.push_back(end_time - start_time);
}
//success
return 0;
}
int write_send_request(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size) {
Common_Request cr;
Common_Request ack;
TCP_Transport *tcp_transport = transport->get_tcp_conn();
memset((void*)&cr, 0, sizeof(cr));
cr.opcode = WRITE;
cr.type = TYPE_REQUEST;
cr.req.w_request.common.opcode = WRITE;
cr.req.w_request.length = 0;
cr.service_type = MASTER_SERVICE;
cr.req.w_request.common.service = MASTER_SERVICE;
cr.request_start_time = chrono::steady_clock::now();
write_request(transport, key, key_size, val, val_size);
// set_mr does a malloc so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport->send_data((char*)&cr, sizeof(cr));
return 0;
}
int write_get_response(RDMA_Transport *transport) {
auto end_time = chrono::steady_clock::now();
Common_Request ack;
TCP_Transport *tcp_transport = transport->get_tcp_conn();
memset((void*)&ack, 0, sizeof(ack));
tcp_transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode != STATUS_OK) {
//error
return -1;
}
//success
if(analyze) {
end_time = chrono::steady_clock::now();
client_rtt_time.push_back(end_time - ack.request_start_time);
}
return 0;
}
#endif
\ No newline at end of file
#ifndef __CLIENT_FUNCTIONS_H__
#define __CLIENT_FUNCTIONS_H__
#include <stddef.h>
#include "../transport_api/transport_config.hpp"
// int read_request(TCP_Transport *transport, char* key, size_t key_size);
// int read_request(RDMA_Transport *transport, char* key, size_t key_size);
// int read_request(TCP_Transport *transport, const char* key, size_t key_size);
// int read_request(RDMA_Transport *transport, const char* key, size_t key_size);
int read_request(RDMA_Transport* transport);
int write_request(TCP_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size);
int write_request(RDMA_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size);
int write_request(TCP_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size);
int write_request(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size);
int read_rpc(TCP_Transport *transport, char* key, size_t key_size);
int read_rpc(RDMA_Transport *transport, char* key, size_t key_size);
int read_rpc(RDMA_Transport *transport, const char* key, size_t key_size);
int read_rpc(TCP_Transport *transport, std::string key, size_t key_size);
int read_rpc(RDMA_Transport *transport, std::string key, size_t key_size);
int write_rpc(TCP_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size);
int write_rpc(RDMA_Transport *transport, char* key, size_t key_size,
char* val, size_t val_size);
int write_rpc(TCP_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size);
int write_rpc(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size);
int write_send_request(RDMA_Transport *transport, const char* key, size_t key_size,
const char* val, size_t val_size);
int write_get_response(RDMA_Transport *transport);
#endif
\ No newline at end of file
#ifndef __COMMON_CC__
#define __COMMON_CC__
#include <atomic>
#include <chrono>
#include <string>
#include <unordered_map>
#include "Buffer.hpp"
#include "../transport_api/transport_config.hpp"
#include "threadsafe_queue.hpp"
#include "log.hpp"
#include "common.hpp"
namespace chrono = std::chrono;
// TIME MEASUREMENT STRUCTURES
std::vector<chrono::duration<double>> request_queue_wait_time;
std::vector<chrono::duration<double>> response_buffer_wait_time;
std::vector<chrono::duration<double>> send_queue_wait_time;
std::vector<chrono::duration<double>> client_rtt_time;
std::vector<chrono::duration<double>> worker_read_service_time;
std::vector<chrono::duration<double>> worker_write_service_time;
std::vector<chrono::duration<double>> append_log_time;
std::vector<chrono::duration<double>> master_backup_ack_time;
std::vector<chrono::duration<double>> rdma_one_sided_write_time;
std::vector<chrono::duration<double>> rdma_one_sided_read_time;
double request_queue_wait_time_sum = 0.0;
double response_buffer_wait_time_sum = 0.0;
double send_queue_wait_time_sum = 0.0;
double client_rtt_time_sum = 0.0;
double worker_read_service_time_sum = 0.0;
double worker_write_service_time_sum = 0.0;
double append_log_time_sum = 0.0;
double master_backup_ack_time_sum = 0.0;
double rdma_one_sided_write_time_sum = 0.0;
double rdma_one_sided_read_time_sum = 0.0;
//
// LOG STRUCTURE DECLARATION
Log storage_log;
std::atomic<int> req_cnt(0); // this is till implementation of clean exit is complete
int max_req = 500;
bool debug = false;
bool analyze = false;
int max_packet_size_bytes = 0;
chrono::duration<double> max_cq_poll_timeout(0.0);
chrono::duration<double> response_buffer_add_timeout(0.05); // 10 ms, static for now (300 for testing)
chrono::duration<double> worker_response_buffer_poll_timeout(0.05); //5 ms, static for now (300 for testing)
std::unordered_map<uint32_t, RDMA_Transport*> common::rdma_transport_map;
std::unordered_map<uint32_t, TCP_Transport*> common::tcp_transport_map;
Thread_Safe_Queue *job_queue = new Thread_Safe_Queue();
ThreadSafe_Queue<struct job_context*> *send_queue = new ThreadSafe_Queue<struct job_context*>();
ThreadSafe_Queue<struct job_context*> *request_queue = new ThreadSafe_Queue<struct job_context*>();
Buffer *response_buffer = new Buffer(response_buffer_size);
int num_servers;
int num_replicas;
#endif
\ No newline at end of file
#ifndef __COMMON_H__
#define __COMMON_H__
#include <atomic>
#include <unordered_map>
#include <chrono>
#include <byteswap.h>
#include <stdint.h>
#include "Buffer.hpp"
#include "threadsafe_queue.hpp"
#include "general_threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
#include "log.hpp"
#if __BYTE_ORDER == LITTLE_ENDIAN
static inline uint64_t htonll(uint64_t x) {return bswap_64(x);}
static inline uint64_t ntohll(uint64_t x) {return bswap_64(x);}
#elif __BYTE_ORDER == BIG_ENDIAN
static inline uint64_t htonll(uint64_t x) {return x;}
static inline uint64_t ntohll(uint64_t x) {return x;}
#else
#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN
#endif
namespace chrono = std::chrono;
enum Entity {
NIC,
SERVER,
CLIENT,
};
extern enum Entity self_id;
// const bool debug = true;
// const chrono::duration<double> max_poll_cq_timeout(0.005); //5 ms
// const int MAX_PACKET_SIZE = ( 1<<21 );
// TIME MEASUREMENT STRUCTURES
extern std::vector<chrono::duration<double>> request_queue_wait_time;
extern std::vector<chrono::duration<double>> response_buffer_wait_time;
extern std::vector<chrono::duration<double>> send_queue_wait_time;
extern std::vector<chrono::duration<double>> client_rtt_time;
extern std::vector<chrono::duration<double>> worker_read_service_time;
extern std::vector<chrono::duration<double>> worker_write_service_time;
extern std::vector<chrono::duration<double>> append_log_time;
extern std::vector<chrono::duration<double>> master_backup_ack_time;
extern std::vector<chrono::duration<double>> rdma_one_sided_write_time;
extern std::vector<chrono::duration<double>> rdma_one_sided_read_time;
extern double request_queue_wait_time_sum;
extern double response_buffer_wait_time_sum;
extern double send_queue_wait_time_sum;
extern double client_rtt_time_sum;
extern double worker_read_service_time_sum;
extern double worker_write_service_time_sum;
extern double append_log_time_sum;
extern double master_backup_ack_time_sum;
extern double rdma_one_sided_write_time_sum;
extern double rdma_one_sided_read_time_sum;
//
// LOG STRUCTURE DECLARATION
extern struct Log storage_log;
extern std::atomic<int> req_cnt; // this is till implementation of clean exit is complete
extern int max_req;
const size_t max_ip_cmd_len = 1024; // for batch mode
const size_t response_buffer_size = 100;
extern bool debug;
extern bool analyze;
extern chrono::duration<double> max_cq_poll_timeout;
extern chrono::duration<double> response_buffer_add_timeout;
extern chrono::duration<double> worker_response_buffer_poll_timeout;
extern int max_packet_size_bytes;
extern class Thread_Safe_Queue *job_queue;
extern class ThreadSafe_Queue<struct job_context*> *send_queue;
extern class ThreadSafe_Queue<struct job_context*> *request_queue;
extern class Buffer *response_buffer;
extern int num_servers;
extern int num_replicas;
//these are the conn objs
//this will do for testing and benchmarking
namespace common {
extern std::unordered_map<uint32_t, RDMA_Transport*> rdma_transport_map;
extern std::unordered_map<uint32_t, TCP_Transport*> tcp_transport_map;
}
// Just for convenience
// static so it doesn't give segfault before main
static std::unordered_map<std::string, std::string> machine_allocation_ips = {
{"ub-02", "192.168.200.30"},
{"ub-02-nic", "192.168.200.31"},
{"ub-04", "192.168.200.50"},
{"ub-04-nic", "192.168.200.51"},
{"ub-05", "192.168.200.20"},
{"ub-05-nic", "192.168.200.21"},
{"ub-08", "192.168.200.40"},
{"ub-08-nic", "192.168.200.41"}
};
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum ServiceType {
MASTER_SERVICE,
BACKUP_SERVICE,
COORDINATOR_SERVICE,
ADMIN_SERVICE,
INVALID_SERVICE, // One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct ClientLease {
uint64_t leaseId; /// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t leaseExpiration; /// Cluster time after which the lease may have
/// become invalid.
uint64_t timestamp; /// Cluster time when this lease information was
/// provided by the coordinator.
} __attribute__((packed));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum Opcode {
PING = 7,
PROXY_PING = 8,
KILL = 9,
CREATE_TABLE = 10,
GET_TABLE_ID = 11,
DROP_TABLE = 12,
READ = 13,
WRITE = 14,
REMOVE = 15,
ENLIST_SERVER = 16,
GET_SERVER_LIST = 17,
GET_TABLE_CONFIG = 18,
RECOVER = 19,
HINT_SERVER_CRASHED = 20,
RECOVERY_MASTER_FINISHED = 21,
ENUMERATE = 22,
SET_MASTER_RECOVERY_INFO = 23,
FILL_WITH_TEST_DATA = 24,
MULTI_OP = 25,
GET_METRICS = 26,
BACKUP_FREE = 28,
BACKUP_GETRECOVERYDATA = 29,
BACKUP_STARTREADINGDATA = 31,
BACKUP_WRITE = 32,
BACKUP_RECOVERYCOMPLETE = 33,
UPDATE_SERVER_LIST = 35,
BACKUP_STARTPARTITION = 36,
DROP_TABLET_OWNERSHIP = 39,
TAKE_TABLET_OWNERSHIP = 40,
GET_HEAD_OF_LOG = 42,
INCREMENT = 43,
PREP_FOR_MIGRATION = 44,
RECEIVE_MIGRATION_DATA = 45,
REASSIGN_TABLET_OWNERSHIP = 46,
MIGRATE_TABLET = 47,
IS_REPLICA_NEEDED = 48,
SPLIT_TABLET = 49,
GET_SERVER_STATISTICS = 50,
SET_RUNTIME_OPTION = 51,
GET_SERVER_CONFIG = 52,
GET_BACKUP_CONFIG = 53,
GET_MASTER_CONFIG = 55,
GET_LOG_METRICS = 56,
VERIFY_MEMBERSHIP = 57,
GET_RUNTIME_OPTION = 58,
GET_LEASE_INFO = 59,
RENEW_LEASE = 60,
SERVER_CONTROL = 61,
SERVER_CONTROL_ALL = 62,
GET_SERVER_ID = 63,
READ_KEYS_AND_VALUE = 64,
LOOKUP_INDEX_KEYS = 65,
READ_HASHES = 66,
INSERT_INDEX_ENTRY = 67,
REMOVE_INDEX_ENTRY = 68,
CREATE_INDEX = 69,
DROP_INDEX = 70,
DROP_INDEXLET_OWNERSHIP = 71,
TAKE_INDEXLET_OWNERSHIP = 72,
PREP_FOR_INDEXLET_MIGRATION = 73,
SPLIT_AND_MIGRATE_INDEXLET = 74,
COORD_SPLIT_AND_MIGRATE_INDEXLET = 75,
TX_DECISION = 76,
TX_PREPARE = 77,
TX_REQUEST_ABORT = 78,
TX_HINT_FAILED = 79,
ECHO = 80,
ILLEGAL_RPC_TYPE = 81, // 1 + the highest legitimate Opcode
//modification
TYPE_REQUEST = 82, // Request distinguisher
TYPE_RESPONSE = 83, // Response distinguisher
};
/**
* Each RPC request starts with this structure.
*/
struct RequestCommon {
uint16_t opcode; /// Opcode of operation to be performed.
uint16_t service; /// ServiceType to invoke for this rpc.
} __attribute__((packed));
/**
* Each RPC response starts with this structure.
*/
struct ResponseCommon {
Status status; // Indicates whether the operation
// succeeded; if not, it explains why.
} __attribute__((packed));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct Write {
static const Opcode opcode = WRITE;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint64_t tableId;
ClientLease lease;
uint64_t rpcId;
uint64_t ackId;
uint32_t length; // Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules rejectRules;
uint8_t async;
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t version;
} __attribute__((packed));
};
struct Read {
static const Opcode opcode = READ;
static const ServiceType service = MASTER_SERVICE;
struct Request {
RequestCommon common;
uint16_t keyLength; // Length of the key in bytes.
// The actual key follows
// immediately after this header.
RejectRules rejectRules;
} __attribute__((packed));
struct Response {
ResponseCommon common;
uint64_t version;
uint32_t length; // Length of the object's value in bytes.
// The actual bytes of the object follow
// immediately after this header.
} __attribute__((packed));
};
////// A common struct for request //////
union request {
struct Write::Request w_request;
struct Write::Response w_response;
struct Read::Request r_request;
struct Read::Response r_response;
} __attribute__((packed));
struct Common_Request {
uint8_t opcode;
uint8_t service_type;
uint8_t type;
chrono::time_point<chrono::steady_clock> request_start_time;
union request req;
//key and value pair immediately follow
} __attribute__((packed));
#endif
#ifndef __CONNECTION_POOL_C__
#define __CONNECTION_POOL_C__
#include <arpa/inet.h>
#include <sys/ioctl.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include "common.hpp"
#include "../config/read_config.hpp"
#include "connection_pool.hpp"
#include "../transport_api/transport_config.hpp"
Connection_Pool::Connection_Pool() {
FD_ZERO(&(this->cset));
this->max_fd = 0;
this->common_port = -1;
this->common_socket_local_fd = -1;
this->common_tcp_conn = NULL;
}
Connection_Pool::Connection_Pool(enum Transport_Type t) : Connection_Pool() {
this->transport_type = t;
}
// Connection_Pool::~Connection_Pool() {
// }
int Connection_Pool::set_common_tcp_conn(int port) {
if(this->common_tcp_conn == NULL) {
this->common_tcp_conn = new TCP_Transport(port);
}
this->common_port = port;
if(this->common_tcp_conn->make_socket()) {
//error
perror("set_common_tcp_conn: Unable to create socket");
return -1;
}
FD_SET(this->common_tcp_conn->get_local_fd(), &(this->cset));
this->max_fd = std::max(this->max_fd, this->common_tcp_conn->get_local_fd());
return 0;
}
int Connection_Pool::start_common_conn_listen() {
this->common_tcp_conn->start_listen();
}
int Connection_Pool::accept_conn(Params *parameters) {
if(this->common_tcp_conn == NULL) {
//error
perror("accept_conn: Common tcp obj not created");
return -1;
}
TCP_Transport *tcp_transport = NULL;
RDMA_Transport *rdma_transport = NULL;
int conn_fd;
tcp_transport = new TCP_Transport();
conn_fd = accept(this->common_tcp_conn->get_local_fd(), NULL, 0);
if(conn_fd < 0) {
//error
perror("accept_conn: Unable to accept connection");
return -1;
}
tcp_transport->set_local_fd(this->common_tcp_conn->get_local_fd());
tcp_transport->set_conn_fd(conn_fd);
this->max_fd = std::max(this->max_fd, conn_fd);
FD_SET(conn_fd, &(this->cset));
switch(this->transport_type) {
case TCP_IP_TRANSPORT:
this->tcp_connection_pool.push_back(tcp_transport);
this->tcp_transport_map[conn_fd] = tcp_transport;
common::tcp_transport_map[common::tcp_transport_map.size()] = tcp_transport;
break;
case RDMA_RC_TRANSPORT:
rdma_transport = new RDMA_Transport(tcp_transport);
rdma_transport->rdma_config->mr.mr_size = parameters->rdma_mr_size_bytes;
rdma_transport->rdma_config->mtu = parameters->rdma_mtu_size_bytes;
rdma_transport->rdma_config->min_rnr_timer = parameters->rdma_min_rnr_timer;
rdma_transport->rdma_config->timeout = parameters->rdma_timeout;
rdma_transport->rdma_config->retry_cnt = parameters->rdma_retry_cnt;
rdma_transport->rdma_config->ib_port = parameters->rdma_ib_port;
rdma_transport->rdma_config->gid_idx = parameters->rdma_gid_idx;
rdma_transport->rdma_setup_no_tcp_setup();
this->rdma_connection_pool.push_back(rdma_transport);
this->rdma_transport_map[conn_fd] = rdma_transport;
common::rdma_transport_map[common::rdma_transport_map.size()] = rdma_transport;
break;
}
return 0;
}
// Sets a common socket which can then accept
// multiple conns. Useful for servers
int Connection_Pool::make_common_socket(int port) {
this->common_port = port;
if(port < 0) {
// error
return -1;
}
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(this->common_port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
this->common_socket_local_fd = socket(host_addr.sin_family,
SOCK_STREAM,
0);
if(this->common_socket_local_fd < 0) {
//error
return -1;
}
FD_SET(this->common_socket_local_fd, &(this->cset));
return 0;
}
// make a connection obj on the
// common socket formed
// Need to form a common socket first
// int Connection_Pool::make_connection_common_socket(std::string addr) {
// int conn_fd;
// TCP_Transport *tcp_transport = NULL;
// RDMA_Transport *rdma_transport = NULL;
// if(debug) {
// printf("Initiating connection on the common socket to: %s %d\n", addr.c_str(), this->common_port);
// }
// switch(this->transport_type) {
// case TCP_IP_TRANSPORT:
// tcp_transport = new TCP_Transport(addr, this->common_port);
// tcp_transport->set_local_fd(this->common_socket_local_fd);
// tcp_transport->accept_conn();
// conn_fd = tcp_transport->get_conn_fd();
// this->max_fd = std::max(this->max_fd, std::max(this->common_socket_local_fd, conn_fd));
// this->tcp_connection_pool.push_back(tcp_transport);
// FD_SET(conn_fd, &(this->cset));
// this->tcp_transport_map[conn_fd] = tcp_transport;
// break;
// case RDMA_RC_TRANSPORT:
// rdma_transport = new RDMA_Transport(addr, this->common_port);
// }
// }
int Connection_Pool::make_connection(std::string addr, int port) {
int local_fd, conn_fd;
TCP_Transport *tcp_transport = NULL;
RDMA_Transport *rdma_transport = NULL;
if(debug) {
printf("Initiating connection to: %s %d \n", addr.c_str(), port);
}
switch(this->transport_type) {
case TCP_IP_TRANSPORT:
tcp_transport = new TCP_Transport(addr, port);
//this->tcp_connection_pool.push_back(tcp_transport);
tcp_transport->setup();
local_fd = tcp_transport->get_local_fd();
conn_fd = tcp_transport->get_conn_fd();
this->max_fd = std::max(this->max_fd, std::max(local_fd, conn_fd));
this->tcp_connection_pool.push_back(tcp_transport);
FD_SET(conn_fd, &(this->cset));
if(conn_fd != local_fd) {
FD_SET(local_fd, &(this->cset));
}
this->tcp_transport_map[local_fd] = tcp_transport;
this->tcp_transport_map[conn_fd] = tcp_transport;
common::tcp_transport_map[common::tcp_transport_map.size()] = tcp_transport;
break;
case RDMA_RC_TRANSPORT:
rdma_transport = new RDMA_Transport(addr, port);
//this->rdma_connection_pool.push_back(rdma_transport);
rdma_transport->rdma_setup();
local_fd = rdma_transport->get_local_fd();
conn_fd = rdma_transport->get_conn_fd();
this->max_fd = std::max(this->max_fd, std::max(local_fd, conn_fd));
this->rdma_connection_pool.push_back(rdma_transport);
FD_SET(conn_fd, &(this->cset));
if(conn_fd != local_fd) {
FD_SET(local_fd, &(this->cset));
}
this->rdma_transport_map[local_fd] = rdma_transport;
this->rdma_transport_map[conn_fd] = rdma_transport;
common::rdma_transport_map[common::rdma_transport_map.size()] = rdma_transport;
break;
}
return 0;
}
int Connection_Pool::make_connection(std::string addr, int port, Params params) {
int local_fd, conn_fd;
TCP_Transport *tcp_transport = NULL;
RDMA_Transport *rdma_transport = NULL;
if(debug) {
printf("Initiating connection to: %s %d \n", addr.c_str(), port);
}
switch(this->transport_type) {
case TCP_IP_TRANSPORT:
tcp_transport = new TCP_Transport(addr, port);
tcp_transport->setup();
local_fd = tcp_transport->get_local_fd();
conn_fd = tcp_transport->get_conn_fd();
this->max_fd = std::max(this->max_fd, std::max(local_fd, conn_fd));
this->tcp_connection_pool.push_back(tcp_transport);
FD_SET(conn_fd, &(this->cset));
if(conn_fd != local_fd) {
FD_SET(local_fd, &(this->cset));
}
this->tcp_transport_map[local_fd] = tcp_transport;
this->tcp_transport_map[conn_fd] = tcp_transport;
common::tcp_transport_map[common::tcp_transport_map.size()] = tcp_transport;
break;
case RDMA_RC_TRANSPORT:
rdma_transport = new RDMA_Transport(addr, port);
rdma_transport->rdma_config->mr.mr_size = params.rdma_mr_size_bytes;
rdma_transport->rdma_config->mtu = params.rdma_mtu_size_bytes;
rdma_transport->rdma_config->min_rnr_timer = params.rdma_min_rnr_timer;
rdma_transport->rdma_config->timeout = params.rdma_timeout;
rdma_transport->rdma_config->retry_cnt = params.rdma_retry_cnt;
rdma_transport->rdma_config->ib_port = params.rdma_ib_port;
rdma_transport->rdma_config->gid_idx = params.rdma_gid_idx;
rdma_transport->rdma_setup();
local_fd = rdma_transport->get_local_fd();
conn_fd = rdma_transport->get_conn_fd();
this->max_fd = std::max(this->max_fd, std::max(local_fd, conn_fd));
this->rdma_connection_pool.push_back(rdma_transport);
FD_SET(conn_fd, &(this->cset));
if(conn_fd != local_fd) {
FD_SET(local_fd, &(this->cset));
}
this->rdma_transport_map[local_fd] = rdma_transport;
this->rdma_transport_map[conn_fd] = rdma_transport;
common::rdma_transport_map[common::rdma_transport_map.size()] = rdma_transport;
break;
}
return 0;
}
std::vector<TCP_Transport*> Connection_Pool::get_tcp_request_conns() {
std::vector<TCP_Transport*> ret_transports;
std::vector<int> fds;
fd_set tset;
tset = this->cset;
int n;
n = select(this->max_fd+1, &tset, NULL, NULL, NULL);
for(int i=0; i<this->max_fd+1; i++) {
if(FD_ISSET(i, &tset)) {
fds.push_back(i);
}
}
for(int i: fds) {
ret_transports.push_back(this->tcp_transport_map[i]);
}
return ret_transports;
}
std::vector<RDMA_Transport*> Connection_Pool::get_rdma_request_conns() {
std::vector<RDMA_Transport*> ret_transports;
std::vector<int> fds;
fd_set tset;
struct timeval tv; // for select timeout
tv.tv_sec = 0; // To make select return
tv.tv_usec = 0; // immediately after checking once
tset = this->cset;
int n;
n = select(this->max_fd+1, &tset, NULL, NULL, &tv);
// TCP_Transport *temp_tcp;
// char* buf;
for(int i=0; i<this->max_fd+1; i++) {
if(FD_ISSET(i, &tset)) {
fds.push_back(i);
// temp_tcp = this->rdma_transport_map[i]->get_tcp_conn();
// buf = NULL;
// temp_tcp->recv_data(&buf);
// free(buf);
// n = 0;
// ioctl(i, FIONREAD, &n);
// if(n==0) {
// //std::cout<<"Connection to fd "<<i<<" is closed"<<std::endl;
// FD_CLR(i, &this->cset);
// }
}
}
for(int i: fds) {
ret_transports.push_back(this->rdma_transport_map[i]);
}
return ret_transports;
}
#endif
\ No newline at end of file
#ifndef __CONNECTION_POOL_H__
#define __CONNECTION_POOL_H__
#include <vector>
#include <unordered_map>
#include "../config/read_config.hpp"
#include "../transport_api/transport_config.hpp"
extern fd_set cset, rset;
// extern std::vector<TCP_Transport*> tcp_connection_pool;
// extern std::vector<RDMA_Transport*> rdma_connection_pool;
class Connection_Pool {
public:
enum Transport_Type transport_type;
TCP_Transport *common_tcp_conn;
int common_port;
int common_socket_local_fd;
fd_set cset;
std::vector<TCP_Transport*> tcp_connection_pool;
std::vector<RDMA_Transport*> rdma_connection_pool;
std::unordered_map<int, TCP_Transport*> tcp_transport_map;
std::unordered_map<int, RDMA_Transport*> rdma_transport_map;
int max_fd;
Connection_Pool();
Connection_Pool(enum Transport_Type t);
Connection_Pool(enum Transport_Type t, int port);
//~Connection_Pool();
int set_common_tcp_conn(int port);
int start_common_conn_listen();
int accept_conn(Params *parameters);
int make_common_socket(int port);
//int make_connection_common_socket(std::string addr);
int make_connection(std::string addr, int port);
int make_connection(std::string addr, int port, Params params);
std::vector<TCP_Transport*> get_tcp_request_conns();
std::vector<RDMA_Transport*> get_rdma_request_conns();
};
#endif
\ No newline at end of file
#ifndef __DISPATCHER_C__
#define __DISPATCHER_C__
#include <algorithm>
#include <chrono>
#include <vector>
#include "../config/read_config.hpp"
#include "common.hpp"
#include "connection_pool.hpp"
#include "client_functions.hpp"
#include "dispatcher.hpp"
#include "thread_functions.hpp"
#include "../transport_api/transport_config.hpp"
namespace chrono = std::chrono;
Dispatcher::Dispatcher(enum Transport_Type t, int num_threads) {
this->transport_type = t;
this->conn_pool = new Connection_Pool(t);
this->thread_pool = new Thread_Pool(num_threads);
this->thread_pool->set_function(worker_function);
this->thread_pool->start_threads();
}
Dispatcher::Dispatcher(enum Transport_Type t, int num_threads, void* func(void*)) {
this->transport_type = t;
this->conn_pool = new Connection_Pool(t);
this->thread_pool = new Thread_Pool(num_threads);
this->thread_pool->set_function(func);
this->thread_pool->start_threads();
}
// Dispatcher::~Dispatcher() {
// }
int Dispatcher::setup_common_tcp_conn(int port) {
this->conn_pool->set_common_tcp_conn(port);
}
int Dispatcher::common_socket_start_listen() {
this->conn_pool->start_common_conn_listen();
}
// Makes a connection on its common tcp conn obj
int Dispatcher::add_conn_on_common_tcp(Params parameters) {
return this->conn_pool->accept_conn(&parameters);
}
// Makes connection in its Connection_Pool
int Dispatcher::add_connection(std::string addr, int port) {
return this->conn_pool->make_connection(addr, port);
}
int Dispatcher::add_connection(std::string addr, int port, Params params) {
return this->conn_pool->make_connection(addr, port, params);
}
std::vector<job_context*> Dispatcher::get_jobs() {
std::vector<TCP_Transport*> tcp_transports;
std::vector<RDMA_Transport*> rdma_transports;
ssize_t n;
std::vector<job_context*> active_jobs;
char* buf = NULL;
job_context* job;
Common_Request *cr;
char *tbuf = NULL;
TCP_Transport *temp_tcp;
switch(this->transport_type) {
case TCP_IP_TRANSPORT:
tcp_transports = this->conn_pool->get_tcp_request_conns();
for(TCP_Transport* transport: tcp_transports) {
n = transport->recv_data(&buf);
cr = (Common_Request*) buf;
job = new job_context(transport, cr->opcode);
job->service_type = cr->service_type;
job->job_type = cr->type;
job->request_packet = buf;
job->request = (Common_Request*) buf;
job->transport_type = TCP_IP_TRANSPORT;
active_jobs.push_back(job);
}
break;
case RDMA_RC_TRANSPORT:
rdma_transports = this->conn_pool->get_rdma_request_conns();
for(RDMA_Transport* transport: rdma_transports) {
temp_tcp = transport->get_tcp_conn();
if(tbuf!=NULL) {
free(tbuf);
}
tbuf = NULL;
temp_tcp->recv_data(&tbuf);
cr = (Common_Request*) tbuf;
switch(cr->type) {
case TYPE_REQUEST:
switch(transport_type) {
case TCP_IP_TRANSPORT:
//pass for now
break;
case RDMA_RC_TRANSPORT:
if(transport->one_sided_read()) {
//error
perror("Unable to read rdma data");
continue;
}
cr = (Common_Request*) transport->get_mr_addr();
job = new job_context(transport, cr->opcode);
job->service_type = cr->service_type;
job->job_type = cr->type;
job->request_packet = transport->get_mr_addr();
job->request = cr;
job->transport_type = RDMA_RC_TRANSPORT;
active_jobs.push_back(job);
if(tbuf!=NULL) {
free(tbuf);
tbuf = NULL;
}
break;
}
break;
case TYPE_RESPONSE:
switch(transport_type) {
case TCP_IP_TRANSPORT:
//pass for now
break;
case RDMA_RC_TRANSPORT:
cr = (Common_Request*) tbuf;
job = new job_context(transport, cr->opcode);
job->service_type = cr->service_type;
job->job_type = cr->type;
job->request_packet = tbuf;
job->request = cr;
job->transport_type = RDMA_RC_TRANSPORT;
active_jobs.push_back(job);
tbuf = NULL;
break;
}
break;
}
}
break;
}
return active_jobs;
}
void Dispatcher::assign_jobs(std::vector<job_context*> jobs) {
int response_buffer_found;
auto start = chrono::steady_clock::now();
auto end = chrono::steady_clock::now();
chrono::duration<double> elapsed_time;
for(job_context* job: jobs) {
// job_queue->enqueue(job);
start = chrono::steady_clock::now();
switch(job->job_type) {
case TYPE_REQUEST:
if(analyze) {
job->job_post_time = chrono::steady_clock::now();
}
request_queue->enqueue(job);
break;
case TYPE_RESPONSE:
switch(transport_type) {
case TCP_IP_TRANSPORT:
do {
response_buffer_found = response_buffer->add_element(job->tcp_transport->get_conn_fd(),
job,
job->request);
end = chrono::steady_clock::now();
} while(response_buffer_found!=0 && (end-start)<response_buffer_add_timeout);
break;
case RDMA_RC_TRANSPORT:
do {
if(analyze) {
job->job_post_time = chrono::steady_clock::now();
}
response_buffer_found = response_buffer->add_element(job->rdma_transport->get_conn_fd(),
job,
job->request);
end = chrono::steady_clock::now();
} while(response_buffer_found!=0 && (end-start)<response_buffer_add_timeout);
break;
}
break;
}
}
}
void Dispatcher::service_send_queue() {
size_t send_queue_size = send_queue->q.size();
job_context *job;
TCP_Transport *tcp_transport;
size_t blob_size;
for(size_t i=0; i<send_queue_size; i++) {
job = send_queue->get_job();
switch(job->job_type) {
case TYPE_REQUEST:
blob_size = sizeof(Common_Request) + job->request->req.w_request.length;
break;
case TYPE_RESPONSE:
blob_size = sizeof(Common_Request);
}
switch(transport_type) {
case TCP_IP_TRANSPORT:
tcp_transport = job->tcp_transport;
break;
case RDMA_RC_TRANSPORT:
tcp_transport = job->rdma_transport->get_tcp_conn();
break;
}
// In case of NIC: We only need to send tcp prep requests
// In case of Server: We only need to send tcp (n)acks
// In case of Client: We only need to send tcp reqs
// Only RDMA reads are used which are preceeded by tcp requests
// so the actual transport does not depend on transport type
tcp_transport->mr = job->request_packet;
tcp_transport->mr_size = blob_size;
tcp_transport->send_data();
if(analyze) {
send_queue_wait_time.push_back(chrono::steady_clock::now() - job->job_post_time);
}
tcp_transport->mr = NULL;
tcp_transport->mr_size = 0;
// free(job->request_packet);
delete(job);
}
}
int Dispatcher::issue_cmd(std::vector<std::string> tokens) {
if(tokens.size()==0) return 0; // empty command
if(tokens.size()==1) {
// invalid command str
return -1;
}
std::vector<std::string>::iterator it;
std::string key, value;
std::string cmd = tokens[0];
std::transform(cmd.begin(), cmd.end(), cmd.begin(),
[](unsigned char c) { return std::tolower(c); } );
if(cmd.compare("get") == 0) {
//sanity check
if(tokens.size()!=2) {
// invalid get format
return -1;
}
// tokens[1]..tokens[size] are keys to get
it = tokens.begin();
it++;
key = *it; // this is the key for get
if(this->transport_type == RDMA_RC_TRANSPORT) {
if(debug) {
printf("Issuing GET Request (RDMA)\n");
}
if(read_rpc((this->conn_pool->rdma_connection_pool[0]),
key.c_str(),
key.size()))
{
//error
perror("issue_cmd: read_rpc rdma fail");
return -1;
}
}
else if(this->transport_type == TCP_IP_TRANSPORT) {
if(debug) {
printf("Issuing PUT Request (RDMA)\n");
}
if(read_rpc((this->conn_pool->tcp_connection_pool[0]),
key,
key.size()))
{
perror("issue_cmd: read_rpc tcp fail");
return -1;
}
}
}
else if(cmd.compare("put") == 0) {
// sanity check
if(tokens.size()!=3) {
// invalid put format
return -1;
}
// only accept cmd key value format for put command
it = tokens.begin();
it++;
key = *it;
it++;
value = *it;
if(this->transport_type == RDMA_RC_TRANSPORT) {
if(debug) {
printf("Issuing PUT request (RDMA)\n");
}
if(write_rpc(this->conn_pool->rdma_connection_pool[0],
key.c_str(),
key.size(),
value.c_str(),
value.size())
) {
//error
return -1;
}
}
else if(this->transport_type == TCP_IP_TRANSPORT) {
if(write_rpc(this->conn_pool->tcp_connection_pool[0],
key.c_str(),
key.size(),
value.c_str(),
value.size())
) {
//error
return -1;
}
}
}
else if(cmd.compare("exit") == 0) {
return 1;
}
else {
//error
return -1;
}
return 0;
}
#endif
\ No newline at end of file
#ifndef __DISPATCHER_H__
#define __DISPATCHER_H__
#include <string>
#include <vector>
#include "../config/read_config.hpp"
#include "connection_pool.hpp"
#include "thread_pool.hpp"
#include "../transport_api/transport_config.hpp"
class Dispatcher {
public:
enum Transport_Type transport_type;
Connection_Pool *conn_pool;
Thread_Pool *thread_pool;
int get_num_active_requests();
Dispatcher(enum Transport_Type t, int num_threads);
Dispatcher(enum Transport_Type t, int num_threads, void* func(void*));
//~Dispatcher();
int setup_common_tcp_conn(int port);
int common_socket_start_listen();
int add_conn_on_common_tcp(Params parameters);
int add_connection(std::string addr, int port);
int add_connection(std::string addr, int port, Params params);
std::vector<job_context*> get_jobs();
void assign_jobs(std::vector<job_context*> jobs);
int issue_cmd(std::vector<std::string> tokens);
void service_send_queue();
};
#endif
\ No newline at end of file
#ifndef __GENERAL_THREADSAFE_QUEUE_H__
#define __GENERAL_THREADSAFE_QUEUE_H__
#include <queue>
#include <pthread.h>
#include "queue_context.hpp"
template <typename T>
class ThreadSafe_Queue {
public:
std::queue<T> q;
pthread_mutex_t queue_lock;
size_t queue_size;
ThreadSafe_Queue();
ThreadSafe_Queue(size_t n, T jobs[]);
~ThreadSafe_Queue();
void enqueue(T job);
void dequeue();
T front();
T get_job();
};
template <typename T>
ThreadSafe_Queue<T>::ThreadSafe_Queue() {
if(pthread_mutex_init(&(this->queue_lock), NULL)) {
perror("Mutex creation error");
exit(-1);
}
}
template <typename T>
ThreadSafe_Queue<T>::ThreadSafe_Queue(size_t n, T jobs[]) : ThreadSafe_Queue() {
for(size_t i=0; i<n; i++) {
this->q.push(jobs[i]);
}
}
template <typename T>
ThreadSafe_Queue<T>::~ThreadSafe_Queue() {
pthread_mutex_unlock(&(this->queue_lock));
pthread_mutex_destroy(&(this->queue_lock));
T *t;
while(!(this->q.empty())) {
t = this->q.front();
this->q.pop();
delete(t);
}
}
template <typename T>
void ThreadSafe_Queue<T>::enqueue(T job) {
pthread_mutex_lock(&(this->queue_lock));
this->q.push(job);
pthread_mutex_unlock(&(this->queue_lock));
}
template <typename T>
void ThreadSafe_Queue<T>::dequeue() {
pthread_mutex_lock(&(this->queue_lock));
this->q.pop();
pthread_mutex_unlock(&(this->queue_lock));
}
template <typename T>
T ThreadSafe_Queue<T>::front() {
T t;
pthread_mutex_lock(&(this->queue_lock));
t = this->q.front();
pthread_mutex_unlock(&(this->queue_lock));
return t;
}
template <typename T>
T ThreadSafe_Queue<T>::get_job() {
T t = NULL;
pthread_mutex_lock(&(this->queue_lock));
if(this->q.empty()) {
pthread_mutex_unlock(&(this->queue_lock));
return NULL;
}
t = this->q.front();
// precaution
if(t==NULL) {
this->q.pop();
pthread_mutex_unlock(&(this->queue_lock));
return NULL;
}
this->q.pop();
pthread_mutex_unlock(&(this->queue_lock));
return t;
}
#endif
\ No newline at end of file
#ifndef __HASH_C__
#define __HASH_C__
#include <stdint.h>
#include "hash.hpp"
uint32_t hash(uint64_t key, int n) {
uint64_t t_n = static_cast<uint64_t>(n);
uint64_t hash_val = key%t_n;
return static_cast<uint32_t>(hash_val);
}
#endif
\ No newline at end of file
#ifndef __HASH_H__
#define __HASH_H__
#include <stdint.h>
uint32_t hash(uint64_t key, int n);
#endif
\ No newline at end of file
#ifndef __MONITOR_CC__
#define __MONITOR_CC__
#include <chrono>
#include <iostream>
#include <pthread.h>
#include <stdio.h>
#include <vector>
#include "monitor.hpp"
namespace chrono = std::chrono;
Monitor::Monitor() {
this->num_requests = 0;
this->num_read_requests = 0;
this->num_write_requests = 0;
this->num_succ_requests = 0;
this->num_err_requests = 0;
this->num_dropped_requests = 0;
this->num_read_succ_requests = 0;
this->num_read_err_requests = 0;
this->num_write_succ_requests = 0;
this->num_write_err_requests = 0;
this->request_throughput = 0.0;
this->read_throughput = 0.0;
this->write_throughput = 0.0;
pthread_mutex_init(&(this->num_requests_lock), NULL);
pthread_mutex_init(&(this->num_read_lock), NULL);
pthread_mutex_init(&(this->num_write_lock), NULL);
pthread_mutex_init(&(this->num_succ_lock), NULL);
pthread_mutex_init(&(this->num_err_lock), NULL);
pthread_mutex_init(&(this->num_dropped_lock), NULL);
pthread_mutex_init(&(this->num_read_succ_lock), NULL);
pthread_mutex_init(&(this->num_read_err_lock), NULL);
pthread_mutex_init(&(this->num_write_succ_lock), NULL);
pthread_mutex_init(&(this->num_write_err_lock), NULL);
pthread_mutex_init(&(this->sum_read_lat_lock), NULL);
pthread_mutex_init(&(this->sum_write_lat_lock), NULL);
pthread_mutex_init(&(this->sum_replicate_lat_lock), NULL);
pthread_mutex_init(&(this->sum_data_transfer_lat_lock), NULL);
pthread_mutex_init(&(this->sum_total_time_lock), NULL);
}
long long Monitor::get_num_requests() {
return this->num_requests;
}
long long Monitor::get_num_read_requests() {
return this->num_read_requests;
}
long long Monitor::get_num_write_requests() {
return this->num_write_requests;
}
long long Monitor::get_num_succ_requests() {
return this->num_succ_requests;
}
long long Monitor::get_num_err_requests() {
return this->num_err_requests;
}
long long Monitor::get_num_dropped_requests() {
return this->num_dropped_requests;
}
long long Monitor::get_num_read_succ_requests() {
return this->num_read_succ_requests;
}
long long Monitor::get_num_read_err_requests() {
return this->num_read_err_requests;
}
long long Monitor::get_num_write_succ_requests() {
return this->num_write_succ_requests;
}
long long Monitor::get_num_write_err_requests() {
return this->num_write_err_requests;
}
void Monitor::incr_num_requests(int i) {
pthread_mutex_lock(&(this->num_requests_lock));
this->num_requests++;
pthread_mutex_unlock(&(this->num_requests_lock));
}
void Monitor::incr_num_read_requests(int i) {
pthread_mutex_lock(&(this->num_read_lock));
this->num_read_requests++;
pthread_mutex_unlock(&(this->num_read_lock));
}
void Monitor::incr_num_write_requests(int i) {
pthread_mutex_lock(&(this->num_write_lock));
this->num_write_requests++;
pthread_mutex_unlock(&(this->num_write_lock));
}
void Monitor::incr_num_succ_requests(int i) {
pthread_mutex_lock(&(this->num_succ_lock));
this->num_succ_requests++;
pthread_mutex_unlock(&(this->num_succ_lock));
}
void Monitor::incr_num_err_requests(int i) {
pthread_mutex_lock(&(this->num_err_lock));
this->num_err_requests++;
pthread_mutex_unlock(&(this->num_err_lock));
}
void Monitor::incr_num_dropped_requests(int i) {
pthread_mutex_lock(&(this->num_dropped_lock));
this->num_dropped_requests++;
pthread_mutex_unlock(&(this->num_dropped_lock));
}
void Monitor::incr_num_read_succ_requests(int i) {
pthread_mutex_lock(&(this->num_read_succ_lock));
this->num_read_succ_requests++;
pthread_mutex_unlock(&(this->num_read_succ_lock));
}
void Monitor::incr_num_read_err_requests(int i) {
pthread_mutex_lock(&(this->num_read_err_lock));
this->num_read_err_requests++;
pthread_mutex_unlock(&(this->num_read_err_lock));
}
void Monitor::incr_num_write_succ_requests(int i) {
pthread_mutex_lock(&(this->num_write_succ_lock));
this->num_write_succ_requests++;
pthread_mutex_unlock(&(this->num_write_succ_lock));
}
void Monitor::incr_num_write_err_requests(int i) {
pthread_mutex_lock(&(this->num_write_err_lock));
this->num_write_err_requests++;
pthread_mutex_unlock(&(this->num_write_err_lock));
}
void Monitor::incr_sum_read_latency(chrono::duration<double, std::milli> lat) {
pthread_mutex_lock(&(this->sum_read_lat_lock));
this->sum_read_latency += lat;
pthread_mutex_unlock(&(this->sum_read_lat_lock));
return;
}
void Monitor::incr_sum_write_latency(chrono::duration<double, std::milli> lat) {
pthread_mutex_lock(&(this->sum_write_lat_lock));
this->sum_write_latency += lat;
pthread_mutex_unlock(&(this->sum_write_lat_lock));
return;
}
void Monitor::incr_sum_replicate_latency(chrono::duration<double, std::milli> lat) {
pthread_mutex_lock(&(this->sum_replicate_lat_lock));
this->sum_replicate_latency += lat;
pthread_mutex_unlock(&(this->sum_replicate_lat_lock));
return;
}
void Monitor::incr_sum_data_transfer_latency(chrono::duration<double, std::milli> lat) {
pthread_mutex_lock(&(this->sum_data_transfer_lat_lock));
this->sum_data_transfer_latency += lat;
pthread_mutex_unlock(&(this->sum_data_transfer_lat_lock));
return;
}
void Monitor::incr_sum_total_time_taken(chrono::duration<double, std::milli> lat) {
pthread_mutex_lock(&(this->sum_total_time_lock));
this->total_time_taken += lat;
pthread_mutex_unlock(&(this->sum_total_time_lock));
return;
}
double Monitor::get_request_throughput() {
if(this->num_requests == 0) return 0.0;
else return (double) (this->total_time_taken.count()/this->num_requests);
}
double Monitor::get_read_throughput() {
if(this->num_read_requests == 0) return 0.0;
else return (double) (this->sum_read_latency.count()/this->num_read_requests);
}
double Monitor::get_write_throughput() {
if(this->num_write_requests == 0) return 0.0;
else return (double) (this->sum_write_latency.count()/this->num_write_requests);
}
std::vector<double> Monitor::get_all_throughput() {
std::vector<double> tput_vect;
// sequence of storage: read, write, total
tput_vect.push_back(this->get_read_throughput());
tput_vect.push_back(this->get_write_throughput());
tput_vect.push_back(this->get_request_throughput());
return tput_vect;
}
#endif
\ No newline at end of file
#ifndef __MONITOR_H__
#define __MONITOR_H__
#include <chrono>
#include <pthread.h>
#include <vector>
namespace chrono = std::chrono;
class Monitor {
private:
pthread_mutex_t num_requests_lock;
pthread_mutex_t num_read_lock;
pthread_mutex_t num_write_lock;
pthread_mutex_t num_succ_lock;
pthread_mutex_t num_err_lock;
pthread_mutex_t num_dropped_lock;
pthread_mutex_t num_read_succ_lock;
pthread_mutex_t num_read_err_lock;
pthread_mutex_t num_write_succ_lock;
pthread_mutex_t num_write_err_lock;
pthread_mutex_t sum_read_lat_lock;
pthread_mutex_t sum_write_lat_lock;
pthread_mutex_t sum_replicate_lat_lock;
pthread_mutex_t sum_data_transfer_lat_lock;
pthread_mutex_t sum_total_time_lock;
protected:
long long num_requests;
long long num_read_requests;
long long num_write_requests;
long long num_succ_requests;
long long num_err_requests;
long long num_dropped_requests;
long long num_read_succ_requests;
long long num_read_err_requests;
long long num_write_succ_requests;
long long num_write_err_requests;
chrono::duration<double, std::milli> sum_read_latency;
chrono::duration<double, std::milli> sum_write_latency;
chrono::duration<double, std::milli> sum_replicate_latency;
chrono::duration<double, std::milli> sum_data_transfer_latency;
chrono::duration<double, std::milli> total_time_taken;
double request_throughput;
double read_throughput;
double write_throughput;
public:
Monitor();
long long get_num_requests();
long long get_num_read_requests();
long long get_num_write_requests();
long long get_num_succ_requests();
long long get_num_err_requests();
long long get_num_dropped_requests();
long long get_num_read_succ_requests();
long long get_num_read_err_requests();
long long get_num_write_succ_requests();
long long get_num_write_err_requests();
void incr_num_requests(int i);
void incr_num_read_requests(int i);
void incr_num_write_requests(int i);
void incr_num_succ_requests(int i);
void incr_num_err_requests(int i);
void incr_num_dropped_requests(int i);
void incr_num_read_succ_requests(int i);
void incr_num_read_err_requests(int i);
void incr_num_write_succ_requests(int i);
void incr_num_write_err_requests(int i);
void incr_sum_read_latency(chrono::duration<double, std::milli> lat);
void incr_sum_write_latency(chrono::duration<double, std::milli> lat);
void incr_sum_replicate_latency(chrono::duration<double, std::milli> lat);
void incr_sum_data_transfer_latency(chrono::duration<double, std::milli> lat);
void incr_sum_total_time_taken(chrono::duration<double, std::milli> lat);
double get_request_throughput();
double get_read_throughput();
double get_write_throughput();
std::vector<double> get_all_throughput();
};
#endif
\ No newline at end of file
#ifndef __QUEUE_CONTEXT_H__
#define __QUEUE_CONTEXT_H__
#include <chrono>
#include "common.hpp"
#include "../transport_api/transport_config.hpp"
namespace chrono = std::chrono;
enum job_opcode {
TCP_READ,
TCP_WRITE,
RDMA_RC_READ,
RDMA_RC_WRITE,
RDMA_RC_TWO_SIDED_SEND,
RDMA_RC_TWO_SIDED_RECV,
TCP_SERVER_READ_REQUEST,
TCP_SERVER_REPLICATION_REQUEST,
TCP_NIC_READ_REQUEST,
TCP_NIC_WRITE_REQUEST,
RDMA_RC_SERVER_READ_REQUEST,
RDMA_RC_SERVER_REPLICATION_REQUEST,
RDMA_RC_NIC_READ_REQUEST,
RDMA_RC_NIC_WRITE_REQUEST
//need to write for RDMA_UC
};
struct job_context {
TCP_Transport *tcp_transport;
RDMA_Transport *rdma_transport;
//enum job_opcode opcode;
uint8_t transport_type;
uint8_t opcode;
uint8_t service_type;
uint8_t job_type;
char* request_packet;
struct Common_Request* request;
chrono::time_point<chrono::steady_clock> job_post_time;
job_context() {
this->tcp_transport = NULL;
this->rdma_transport = NULL;
this->request = NULL;
}
job_context(TCP_Transport *transport, uint8_t opcode) {
this->tcp_transport = transport;
this->rdma_transport = NULL;
this->request = NULL;
this->opcode = opcode;
this->job_post_time = chrono::steady_clock::now();
}
job_context(TCP_Transport *transport, uint8_t opcode, uint8_t type) {
this->tcp_transport = transport;
this->rdma_transport = NULL;
this->request = NULL;
this->opcode = opcode;
this->job_type = type;
this->job_post_time = chrono::steady_clock::now();
}
job_context(RDMA_Transport *transport, uint8_t opcode) {
this->tcp_transport = NULL;
this->rdma_transport = transport;
this->request = NULL;
this->opcode = opcode;
this->job_post_time = chrono::steady_clock::now();
}
job_context(RDMA_Transport *transport, uint8_t opcode, uint8_t type) {
this->tcp_transport = NULL;
this->rdma_transport = transport;
this->request = NULL;
this->opcode = opcode;
this->job_type = type;
this->job_post_time = chrono::steady_clock::now();
}
~job_context() {
this->tcp_transport = NULL;
this->rdma_transport = NULL;
this->request_packet = NULL;
this->request = NULL;
}
};
#endif
\ No newline at end of file
#ifndef __SERVER_FUNCTION_CC__
#define __SERVER_FUNCTION_CC__
#include <algorithm>
#include <chrono>
#include <set>
#include <vector>
#include <string>
#include "common.hpp"
#include "hash.hpp"
#include "thread_functions.hpp"
#include "thread_pool.hpp"
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
#include "cli_api.hpp"
#include "client_functions.hpp"
namespace chrono = std::chrono;
void* thread_function(void*) {
job_context *job = NULL;
char* packet;
char* t;
Common_Request *cr;
uint8_t transport_type;
uint8_t service_type;
while(true) { //change true loop to some variable dependent loop
while(job==NULL) {
job = job_queue->get_job();
}
if(job==NULL) continue;
packet = job->request_packet;
transport_type = job->transport_type;
service_type = job->service_type;
cr = (Common_Request*) packet;
switch(transport_type) {
case TCP_IP_TRANSPORT:
if(cr->opcode == READ) {
if(service_type == MASTER_SERVICE) {
if(tcp_nic_read(job)) {
//err
printf("Unsuccessful completion: tcp_nic_read\n");
}
else {
printf("Successful completion: tcp_nic_read\n");
}
}
else {
if(tcp_server_read(job)) {
printf("Unsuccessful completion: tcp_server_read\n");
}
else {
printf("Successful completion: tcp_server_read\n");
}
}
}
else {
if(service_type == MASTER_SERVICE) {
if(tcp_nic_write(job)) {
printf("Unsuccessful completion: tcp_nic_write\n");
}
else {
printf("Successful completion: tcp_nic_write\n");
}
}
else {
if(tcp_server_replicate(job)) {
printf("Unsuccessful completion: tcp_server_replicate\n");
}
else {
printf("Successful completion: tcp_server_replicate\n");
}
}
}
break;
case RDMA_RC_TRANSPORT:
if(cr->opcode == READ) {
if(service_type == MASTER_SERVICE) {
if(rdma_rc_nic_read(job)) {
printf("Unsuccessful completion: rdma_rc_nic_read\n");
}
else {
printf("Successful completion: rdma_rc_nic_read\n");
}
}
else {
if(rdma_rc_server_read(job)) {
printf("Unsuccessful completion: rdma_rc_server_read\n");
}
else {
printf("Successful completion: rdma_rc_server_read\n");
}
}
}
else {
if(service_type == MASTER_SERVICE) {
if(rdma_rc_nic_write(job)) {
printf("Unsuccessful completion: rdma_rc_nic_write\n");
}
else {
printf("Successful completion: rdma_rc_nic_write\n");
}
}
else {
if(rdma_rc_server_replicate(job)) {
printf("Unsuccessful completion: rdma_rc_server_replicate\n");
}
else {
printf("Successful completion: rdma_rc_server_replicate\n");
}
}
}
break;
}
delete(job);
job = NULL;
}
return NULL;
}
void* worker_function(void*) {
job_context *job = NULL;
char* packet;
char* t;
Common_Request *cr;
uint8_t job_transport_type;
uint8_t service_type;
uint8_t packet_type;
while(true) {
while(job==NULL) {
job = request_queue->get_job();
}
if(analyze) {
request_queue_wait_time.push_back(chrono::steady_clock::now() - job->job_post_time);
}
packet = job->request_packet;
cr = job->request;
job_transport_type = job->transport_type;
service_type = job->service_type;
// job type is implicit::TYPE_REQUEST since its from request queue
switch(job_transport_type) {
case TCP_IP_TRANSPORT:
//pass for now
break;
case RDMA_RC_TRANSPORT:
switch(cr->opcode) {
case READ:
case MASTER_SERVICE:
switch(self_id) {
case NIC:
if(rdma_rc_nic_read(job)) {
if(debug) {
printf("Unsuccessful completion: rdma_rc_nic_read\n");
}
}
else {
if(debug) {
printf("Successful completion: rdma_rc_nic_read\n");
}
}
break;
case SERVER:
if(rdma_rc_server_read(job)) {
if(debug) {
printf("Unsuccessful completion: rdma_rc_server_read\n");
}
}
else {
if(debug) {
printf("Successful completion: rdma_rc_server_read\n");
}
}
break;
}
break;
case WRITE:
switch(cr->service_type) {
case MASTER_SERVICE:
switch(self_id) {
case NIC:
if(rdma_rc_nic_write(job)) {
if(debug) printf("Unsuccessful completion: rdma_rc_nic_write\n");
}
else {
if(debug) printf("Successful completion: rdma_rc_nic_write\n");
}
req_cnt++;
break;
case SERVER:
if(rdma_rc_server_write(job)) {
if(debug) printf("Unsuccessful completion: rdma_rc_server_write\n");
}
else {
if(debug) printf("Successful completion: rdma_rc_server_write\n");
}
req_cnt++;
break;
}
break;
case BACKUP_SERVICE:
if(rdma_rc_server_replicate(job)) {
if(debug) printf("Unsuccessful completion: rdma_rc_service_replicate\n");
}
else {
if(debug) printf("Successful completion: rdma_rc_service_replicate\n");
}
req_cnt++;
break;
}
break;
}
break;
}
delete(job);
job = NULL;
}
return NULL;
}
void* client_worker_function(void* transport_ptr) { // job will not contain transport
RDMA_Transport *transport = (RDMA_Transport*) transport_ptr;
job_context *job = NULL;
char* packet;
char* t;
Common_Request *cr;
uint8_t job_transport_type;
uint8_t opcode;
char* blob;
size_t key_size;
std::string key;
std::string value;
while(true) {
while(job==NULL) {
job = request_queue->get_job();
}
packet = job->request_packet;
cr = job->request;
job_transport_type = job->transport_type;
opcode = cr->opcode;
switch(job_transport_type) {
case TCP_IP_TRANSPORT:
// pass for now
break;
case RDMA_RC_TRANSPORT:
switch(opcode) {
case READ:
blob = job->request_packet + sizeof(Common_Request);
key = std::string(get_key_ptr(blob));
key_size = get_key_size_from_blob(blob);
read_rpc(transport,
key.c_str(),
key_size);
break;
case WRITE:
blob = packet + sizeof(Common_Request);
key = std::string(get_key_ptr(blob));
key_size = get_key_size_from_blob(blob);
value = std::string(get_val_ptr(blob));
size_t value_size = get_val_size_from_start_ptr(get_val_ptr(blob));
write_rpc(transport,
key.c_str(),
key_size,
value.c_str(),
value_size);
break;
}
break;
}
switch(job_transport_type) {
case TCP_IP_TRANSPORT:
//ignore
break;
case RDMA_RC_TRANSPORT:
if(job->rdma_transport->get_mr_addr() != job->request_packet) {
free(job->request_packet);
}
}
delete(job);
job = NULL;
}
return NULL;
}
void append_to_log(char * buf, size_t buf_size) {
//ignore
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
if(analyze) {
append_log_time.push_back(end_time - start_time);
}
return;
}
int log_append(char* key, size_t key_size, char* value, size_t value_size) {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
if(storage_log.add_to_log(key, key_size, value, value_size)) {
printf("Log full\n");
return -1;
}
if(analyze) {
end_time = chrono::steady_clock::now();
append_log_time.push_back(end_time - start_time);
}
return 0;
}
int tcp_server_read(job_context* job) {
//ignore the request
return 0;
}
int tcp_server_write(job_context* job) {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
auto poll_start_time = chrono::steady_clock::now();
auto poll_end_time = chrono::steady_clock::now();
job_context *job_to_send;
int response_buffer_pos;
char* packet = job->request_packet;
Common_Request *w = job->request;
size_t blob_size = w->req.w_request.length;
size_t final_size = sizeof(Common_Request) + blob_size;
char* blob = packet + sizeof(Common_Request);
char *request_to_send;
Common_Request *ack;
w->service_type = BACKUP_SERVICE;
bool flag = true;
std::vector<TCP_Transport*> replica_conns;
replica_conns = get_tcp_replicas(w->req.w_request.rpcId);
for(TCP_Transport *transport: replica_conns) {
request_to_send = (char*) malloc(final_size);
memcpy(request_to_send, w, final_size);
((Common_Request*)request_to_send)->service_type = BACKUP_SERVICE;
job_to_send = new job_context(transport, WRITE);
job_to_send->job_type = TYPE_REQUEST;
job_to_send->service_type = BACKUP_SERVICE;
job_to_send->request = (Common_Request*) request_to_send;
job_to_send->request_packet = request_to_send;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
request_to_send = NULL;
job_to_send = NULL;
}
// append to log
char* key_ptr = get_key_ptr(blob);
size_t key_size = get_key_size_from_start_ptr(key_ptr);
char* value_ptr = get_val_ptr(blob);
size_t vallue_size = get_val_size_from_start_ptr(value_ptr);
if(storage_log.add_to_log(key_ptr, key_size, value_ptr, vallue_size)) {
printf("Log full\n");
}
// get acks
memset(ack, 0, sizeof(Common_Request));
for(TCP_Transport *transport: replica_conns) {
response_buffer_pos = -1;
poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(transport->get_conn_fd());
poll_end_time = chrono::steady_clock::now();
} while(response_buffer < 0 && (poll_end_time - poll_start_time) < worker_response_buffer_poll_timeout);
if(response_buffer_pos == -1) {
printf("Response Buffer timeout\n");
//send Nack
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) {
printf("Ack received\n");
}
}
else {
if(debug) {
printf("Nack received\n");
}
flag = false;
}
response_buffer->erase_element(transport->get_conn_fd());
}
ack = new Common_Request();
ack->type = TYPE_RESPONSE;
job_to_send = new job_context(job->tcp_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = ack;
job_to_send->request_packet = (char*) ack;
if(!flag) {
printf("All Acks not received\n");
// send Nack
ack->opcode = STATUS_INTERNAL_ERROR;
ack->req.w_response.common.status = STATUS_INTERNAL_ERROR;
job_to_send->opcode = STATUS_INTERNAL_ERROR;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
return -1;
}
else {
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
return 0;
}
return 0;
}
int tcp_nic_read(job_context* job) {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
auto response_poll_start_time = chrono::steady_clock::now();
auto response_poll_end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
Common_Request *r = job->request;
char* packet = job->request_packet;
char* key = packet + sizeof(Common_Request);
size_t key_size = get_key_size_from_start_ptr(key);
struct Seglet* value_ptr = storage_log.check_log_for_obj(key);
Common_Request *request_to_send;
job_context *job_to_send;
if(value_ptr == NULL) {
// invalid request
// send Nack
request_to_send = new Common_Request();
request_to_send->opcode = READ;
request_to_send->type = TYPE_RESPONSE;
request_to_send->req.r_response.common.status = STATUS_INVALID_OBJECT;
request_to_send->req.r_response.length = 0;
job_to_send = new job_context(job->tcp_transport, STATUS_INVALID_OBJECT);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = request_to_send;
job_to_send->request_packet = (char*) request_to_send;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
return 0;
}
// valid case
int response_buffer_pos;
Common_Request *ack;
size_t value_size = get_val_size_from_start_ptr(value_ptr->value);
size_t final_size = sizeof(Common_Request) + value_size + 1;
char* packet_to_send = (char*) malloc(final_size);
memset(packet_to_send, 0, final_size);
memcpy(packet_to_send+sizeof(Common_Request), value_ptr->value, value_size);
request_to_send = (Common_Request*) packet_to_send;
request_to_send->opcode = READ;
request_to_send->type = TYPE_RESPONSE;
request_to_send->req.r_response.common.status = STATUS_OK;
request_to_send->req.r_response.length = value_size+1;
job_to_send = new job_context(job->tcp_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = request_to_send;
job_to_send->request_packet = packet_to_send;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
// wait for ack
response_buffer_pos = -1;
response_poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(job->tcp_transport->get_conn_fd());
response_poll_end_time = chrono::steady_clock::now();
} while(response_buffer_pos < 0 && (response_poll_end_time - response_poll_start_time) < worker_response_buffer_poll_timeout);
if(response_buffer_pos == -1) {
printf("Response not received\n");
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) printf("Ack received\n");
}
else {
if(debug) printf("Nack received\n");
}
response_buffer->erase_element(job->tcp_transport->get_conn_fd());
if(analyze) {
end_time = chrono::steady_clock::now();
worker_read_service_time.push_back((end_time-start_time));
}
return 0;
}
int rdma_rc_server_read(job_context* job) {
// THE CR STRUCTURE FOR READ IS
/*
-------------------------------------------
| HEADER | KEY | NULL |
|____________________|_________|__________|
*/
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
auto response_poll_start_time = chrono::steady_clock::now();
auto response_poll_end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
Common_Request *w = job->request;
char* packet = job->request_packet;
char* blob = job->request_packet + sizeof(Common_Request);
char* key = get_key_ptr(blob);
size_t key_size = get_key_size_from_start_ptr(key);
struct Seglet* value_ptr = storage_log.check_log_for_obj(key);
if(value_ptr == NULL) {
// invalid request
return -1;
}
// valid case
int response_buffer_pos;
job_context *job_to_send;
Common_Request *ack;
size_t value_size = get_val_size_from_start_ptr((char*)value_ptr);
// copy value to MR
memset(job->rdma_transport->get_mr_addr(), 0, job->rdma_transport->get_mr_size());
memcpy(job->rdma_transport->get_mr_addr(), (char*)value_ptr, value_size);
// send prep req to client to read MR
Common_Request *prep_req = new Common_Request;
prep_req->opcode = READ;
prep_req->type = TYPE_RESPONSE;
prep_req->req.r_response.common.status = STATUS_OK;
prep_req->req.r_response.length = 0;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = prep_req;
job_to_send->request_packet = (char*) prep_req;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
// wait for ack
response_buffer_pos = -1;
response_poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(job->rdma_transport->get_conn_fd());
response_poll_end_time = chrono::steady_clock::now();
} while(response_buffer_pos < 0 && (response_poll_end_time - response_poll_start_time) < worker_response_buffer_poll_timeout);
if(response_buffer_pos==-1) {
printf("Response buffer poll timeout\n");
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) printf("Ack received\n");
}
else {
if(debug) printf("NAck Received\n");
}
response_buffer->erase_element(job->rdma_transport->get_conn_fd());
if(analyze) {
end_time = chrono::steady_clock::now();
worker_read_service_time.push_back((end_time - start_time));
}
return 0;
}
int rdma_rc_server_write(job_context* job) {
auto ack_start_time = chrono::steady_clock::now();
auto ack_end_time = chrono::steady_clock::now();
auto poll_start_time = chrono::steady_clock::now();
auto poll_end_time = chrono::steady_clock::now();
job_context *job_to_send;
int response_buffer_pos;
char* packet = job->request_packet;
Common_Request *w = (Common_Request*) packet;
size_t blob_size = w->req.w_request.length;
char* blob = packet + sizeof(Common_Request);
Common_Request *prep_request = new Common_Request();
prep_request->opcode = WRITE;
prep_request->req.w_request.length = 0;
prep_request->req.w_request.common.opcode = WRITE;
prep_request->service_type = BACKUP_SERVICE;
prep_request->type = TYPE_REQUEST;
Common_Request *ack;
bool flag = true;
//change required attrs in packet common request
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->service_type = BACKUP_SERVICE;
w->req.w_request.common.opcode = WRITE;
w->req.w_request.common.service = BACKUP_SERVICE;
//get replica connection objects
TCP_Transport *tcp_conn;
std::vector<RDMA_Transport*> replica_conns;
replica_conns = get_rdma_replicas(w->req.w_request.rpcId);
// copy packet to mrs and send prep requests
// packet already exists mr
for(RDMA_Transport *transport: replica_conns) {
memcpy(transport->get_mr_addr(), job->rdma_transport->get_mr_addr(), blob_size + sizeof(Common_Request));
job_to_send = new job_context(transport, job->opcode);
job_to_send->job_type = TYPE_REQUEST;
job_to_send->request = prep_request;
job_to_send->request_packet = (char*) prep_request;
job_to_send->service_type = prep_request->service_type;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
}
// append data to own log
char* key_ptr = get_key_ptr(blob);
size_t key_size = get_key_size_from_blob(blob);
char* value_ptr = get_val_ptr(blob);
size_t value_size = get_val_size_from_start_ptr(value_ptr);
if(log_append(key_ptr, key_size, value_ptr, value_size)) {
printf("Log full\n");
}
//get acks
if(analyze) {
ack_start_time = chrono::steady_clock::now();
}
for(RDMA_Transport* transport: replica_conns) {
//need to poll response buffer
response_buffer_pos = -1;
poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(transport->get_conn_fd());
poll_end_time = chrono::steady_clock::now();
} while(response_buffer_pos < 0 && (poll_end_time - poll_start_time) < worker_response_buffer_poll_timeout);
if(analyze) {
response_buffer_wait_time.push_back(chrono::steady_clock::now() - job->job_post_time);
}
if(response_buffer_pos==-1) {
printf("Response buffer poll timeout\n");
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) printf("Ack received\n");
}
else {
if(debug) printf("NAck Received\n");
flag &= false;
}
response_buffer->erase_element(transport->get_conn_fd());
}
if(analyze) {
ack_end_time = chrono::steady_clock::now();
master_backup_ack_time.push_back(ack_end_time - ack_start_time);
}
if(!flag) {
//send err
printf("Error in receiving ack\n");
//delete(w);
return -1;
}
else {
ack = new Common_Request();
ack->opcode = STATUS_OK;
ack->req.w_request.length = 0;
ack->req.w_response.common.status = STATUS_OK;
ack->type = TYPE_RESPONSE;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = ack;
job_to_send->request_packet = (char*) ack;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
}
return 0;
}
int rdma_rc_nic_read(job_context* job) {
// THE CR STRUCTURE FOR READ IS
/*
-------------------------------------------
| HEADER | KEY | NULL |
|____________________|_________|__________|
*/
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
auto response_poll_start_time = chrono::steady_clock::now();
auto response_poll_end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
Common_Request *w = job->request;
char* packet = job->request_packet;
char* blob = job->request_packet + sizeof(Common_Request);
char* key = get_key_ptr(blob);
size_t key_size = get_key_size_from_start_ptr(key);
struct Seglet* value_ptr = storage_log.check_log_for_obj(key);
Common_Request *prep_req;
job_context *job_to_send;
if(value_ptr == NULL) {
// invalid request
// here instead of prep request, send error req
prep_req = new Common_Request();
prep_req->opcode = READ;
prep_req->type = TYPE_RESPONSE;
prep_req->req.r_response.common.status = STATUS_INVALID_OBJECT;
prep_req->req.r_response.length = 0;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = prep_req;
job_to_send->request_packet = (char*) prep_req;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
return -1;
}
// valid case
int response_buffer_pos;
Common_Request *ack;
size_t value_size = get_val_size_from_start_ptr(value_ptr->value);
// copy value to MR
memset(job->rdma_transport->get_mr_addr(), 0, job->rdma_transport->get_mr_size());
memcpy(job->rdma_transport->get_mr_addr(), value_ptr->value, value_size);
// send prep req to client to read MR
prep_req = new Common_Request;
prep_req->opcode = READ;
prep_req->type = TYPE_RESPONSE;
prep_req->req.r_response.common.status = STATUS_OK;
prep_req->req.r_response.length = 0;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = prep_req;
job_to_send->request_packet = (char*) prep_req;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
// wait for ack
response_buffer_pos = -1;
response_poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(job->rdma_transport->get_conn_fd());
response_poll_end_time = chrono::steady_clock::now();
} while(response_buffer_pos < 0 && (response_poll_end_time - response_poll_start_time) < worker_response_buffer_poll_timeout);
if(response_buffer_pos==-1) {
printf("Response buffer poll timeout\n");
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) printf("Ack received\n");
}
else {
if(debug) printf("NAck Received\n");
}
response_buffer->erase_element(job->rdma_transport->get_conn_fd());
if(analyze) {
end_time = chrono::steady_clock::now();
worker_read_service_time.push_back((end_time - start_time));
}
return 0;
}
std::vector<TCP_Transport*> get_tcp_replicas(uint64_t key) {
std::set<TCP_Transport*> in_transports;
uint32_t hash_val = hash(key, num_servers);
in_transports.insert(common::tcp_transport_map[hash_val]);
size_t p_size;
size_t a_size;
for(int i=0, c=0; i<num_servers && c<num_replicas; i++) {
p_size = in_transports.size();
in_transports.insert(common::tcp_transport_map[i]);
a_size = in_transports.size();
if(p_size!=a_size) c++;
}
std::vector<TCP_Transport*> ret_transports(in_transports.begin(), in_transports.end());
return ret_transports;
}
std::vector<RDMA_Transport*> get_rdma_replicas(uint64_t key) {
std::set<RDMA_Transport*> in_transports;
uint32_t hash_val = hash(key, num_servers);
in_transports.insert(common::rdma_transport_map[hash_val]);
size_t p_size;
size_t a_size;
for(int i=0, c=0; i<num_servers && c<num_replicas; i++) {
p_size = in_transports.size();
in_transports.insert(common::rdma_transport_map[i]);
a_size = in_transports.size();
if(p_size!=a_size) c++;
}
std::vector<RDMA_Transport*> ret_transports(in_transports.begin(), in_transports.end());
return ret_transports;
}
int tcp_server_replicate(job_context* job) {
char* packet = job->request_packet;
Common_Request *w = (Common_Request*) packet;
size_t blob_size = w->req.w_request.length;
char* blob = packet + sizeof(struct Common_Request);
append_to_log(blob, blob_size);
//now to send ack back
TCP_Transport *transport = job->tcp_transport;
w = new Common_Request();
w->opcode = STATUS_OK;
w->req.w_response.common.status = STATUS_OK;
transport->mr = (char*) w;
transport->mr_size = sizeof(struct Common_Request);
if(transport->send_data()) {
//error
delete(w);
return -1;
}
delete(w);
return 0;
}
int rdma_rc_server_replicate(job_context* job) {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
RDMA_Transport *rdma_transport = job->rdma_transport;
if(rdma_transport->one_sided_read()) {
perror("Unable to read replicate request");
return -1;
}
char *packet = rdma_transport->get_mr_addr();
//char *packet = job->request_packet;
job_context *job_to_send;
Common_Request *w = (Common_Request*) packet;
size_t blob_size = w->req.w_request.length;
char* blob = packet + sizeof(struct Common_Request);
// Append to log
char* key_ptr = get_key_ptr(blob);
size_t key_size = get_key_size_from_blob(blob);
char* value_ptr = get_val_ptr(blob);
size_t value_size = get_val_size_from_start_ptr(value_ptr);
if(log_append(key_ptr, key_size, value_ptr, value_size)) {
printf("Log full\n");
}
//now to send ack back
TCP_Transport *tcp_transport = job->rdma_transport->get_tcp_conn();
w = new Common_Request();
w->opcode = STATUS_OK;
w->req.w_request.length = 0;
w->req.w_response.common.status = STATUS_OK;
w->type = TYPE_RESPONSE;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->opcode = STATUS_OK;
job_to_send->request = w;
job_to_send->request_packet = (char*) w;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
if(analyze) {
end_time = chrono::steady_clock::now();
worker_write_service_time.push_back(end_time - start_time);
}
return 0;
}
int tcp_nic_write(job_context* job) {
// in case of tcp, no need for prep request
// char *prep_request;
// size_t prep_request_size;
char *packet = job->request_packet;
Common_Request *w = (Common_Request*) packet;
size_t blob_size = w->req.w_request.length;
char* blob = packet + sizeof(Common_Request);
Common_Request ack;
bool flag = true;
//get replica connection objects
std::vector<TCP_Transport*> replica_conns;
replica_conns = get_tcp_replicas(w->req.w_request.rpcId);
w->service_type = BACKUP_SERVICE;
for(TCP_Transport* transport: replica_conns) {
transport->mr = packet;
transport->mr_size = sizeof(Common_Request) + blob_size;
transport->send_data();
}
//get acknowledgements
for(TCP_Transport* transport: replica_conns) {
transport->recv_data((char*)&ack, sizeof(ack));
if(ack.opcode == STATUS_OK) {
//do something
}
else {
flag = flag & false;
}
}
memset(&ack, 0, sizeof(Common_Request));
if(flag) {
ack.opcode = STATUS_OK;
ack.req.w_response.common.status = STATUS_OK;
}
else {
ack.opcode = STATUS_INTERNAL_ERROR;
ack.req.w_response.common.status = STATUS_INTERNAL_ERROR;
}
TCP_Transport* client_conn = job->tcp_transport;
client_conn->mr = (char*)&ack;
client_conn->mr_size = sizeof(ack);
client_conn->send_data();
if(!flag) return -1;
return 0;
}
int rdma_rc_nic_write(job_context* job) {
auto ack_start_time = chrono::steady_clock::now();
auto ack_end_time = chrono::steady_clock::now();
auto poll_start_time = chrono::steady_clock::now();
auto poll_end_time = chrono::steady_clock::now();
job_context *job_to_send;
int response_buffer_pos;
char* packet = job->request_packet;
Common_Request *w = (Common_Request*) packet;
size_t blob_size = w->req.w_request.length;
char* blob = packet + sizeof(Common_Request);
Common_Request *prep_request = new Common_Request();
prep_request->opcode = WRITE;
prep_request->req.w_request.length = 0;
prep_request->req.w_request.common.opcode = WRITE;
prep_request->service_type = BACKUP_SERVICE;
prep_request->type = TYPE_REQUEST;
Common_Request *ack;
bool flag = true;
//change required attrs in packet common request
w->opcode = WRITE;
w->type = TYPE_REQUEST;
w->service_type = BACKUP_SERVICE;
w->req.w_request.common.opcode = WRITE;
w->req.w_request.common.service = BACKUP_SERVICE;
//memcpy(packet, (char*)&prep_request, sizeof(prep_request));
//job->rdma_transport->copy_to_mr(packet, sizeof(Common_Request)+blob_size);
//get replica connection objects
TCP_Transport *tcp_conn;
std::vector<RDMA_Transport*> replica_conns;
replica_conns = get_rdma_replicas(w->req.w_request.rpcId);
// copy packet to mrs and send prep requests
// packet already exists mr
for(RDMA_Transport *transport: replica_conns) {
memcpy(transport->get_mr_addr(), job->rdma_transport->get_mr_addr(), blob_size + sizeof(Common_Request));
job_to_send = new job_context(transport, job->opcode);
job_to_send->job_type = TYPE_REQUEST;
job_to_send->request = prep_request;
job_to_send->request_packet = (char*) prep_request;
job_to_send->service_type = prep_request->service_type;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
}
// Append to log
char* key_ptr = get_key_ptr(blob);
size_t key_size = get_key_size_from_blob(blob);
char* value_ptr = get_val_ptr(blob);
size_t value_size = get_val_size_from_start_ptr(value_ptr);
if(log_append(key_ptr, key_size, value_ptr, value_size)) {
printf("Log full\n");
}
//get acks
if(analyze) {
ack_start_time = chrono::steady_clock::now();
}
for(RDMA_Transport* transport: replica_conns) {
// tcp_conn = transport->get_tcp_conn();
// tcp_conn->recv_data((char*)&ack, sizeof(ack));
// if(ack.opcode == STATUS_OK) {
// //do something
// printf("Ack Received\n");
// }
// else {
// printf("NAck Received\n");
// flag = flag & false;
// }
//need to poll response buffer
response_buffer_pos = -1;
poll_start_time = chrono::steady_clock::now();
do {
response_buffer_pos = response_buffer->poll(transport->get_conn_fd());
poll_end_time = chrono::steady_clock::now();
} while(response_buffer_pos < 0 && (poll_end_time - poll_start_time) < worker_response_buffer_poll_timeout);
if(analyze) {
response_buffer_wait_time.push_back(chrono::steady_clock::now() - job->job_post_time);
}
if(response_buffer_pos==-1) {
printf("Response buffer poll timeout\n");
exit(-1);
}
ack = response_buffer->buffer[response_buffer_pos].cr;
if(ack->opcode == STATUS_OK) {
if(debug) printf("Ack received\n");
}
else {
if(debug) printf("NAck Received\n");
flag &= false;
}
response_buffer->erase_element(transport->get_conn_fd());
}
if(analyze) {
ack_end_time = chrono::steady_clock::now();
master_backup_ack_time.push_back(ack_end_time - ack_start_time);
}
if(!flag) {
//send err
printf("Error in receiving ack\n");
//delete(w);
return -1;
}
else {
ack = new Common_Request();
ack->opcode = STATUS_OK;
ack->req.w_request.length = 0;
ack->req.w_response.common.status = STATUS_OK;
ack->type = TYPE_RESPONSE;
ack->request_start_time = job->request->request_start_time;
job_to_send = new job_context(job->rdma_transport, STATUS_OK);
job_to_send->job_type = TYPE_RESPONSE;
job_to_send->request = ack;
job_to_send->request_packet = (char*) ack;
job_to_send->job_post_time = chrono::steady_clock::now();
send_queue->enqueue(job_to_send);
job_to_send = NULL;
// tcp_conn = job->rdma_transport->get_tcp_conn();
// tcp_conn->mr = (char*)&ack;
// tcp_conn->mr_size = sizeof(ack);
// tcp_conn->send_data();
}
//delete(w);
return 0;
}
#endif
\ No newline at end of file
#ifndef __THREAD_FUNCTIONS_H__
#define __THREAD_FUNCTIONS_H__
#include <vector>
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
//void thread_function(void);
void* thread_function(void*);
void* worker_function(void*);
void* client_worker_function(void* transport_ptr);
int tcp_server_read(job_context *job);
int tcp_server_write(job_context *job);
int tcp_server_replicate(job_context *job);
int tcp_nic_read(job_context *job);
int tcp_nic_write(job_context *job);
int rdma_rc_server_read(job_context *job);
int rdma_rc_server_write(job_context *job);
int rdma_rc_server_replicate(job_context *job);
int rdma_rc_nic_read(job_context *job);
int rdma_rc_nic_write(job_context *job);
void append_to_log(char *buf, size_t buf_size);
int log_append(char* key, size_t key_size, char* value, size_t value_size);
std::vector<TCP_Transport*> get_tcp_replicas(uint64_t key);
std::vector<RDMA_Transport*> get_rdma_replicas(uint64_t key);
#endif
\ No newline at end of file
#ifndef __THREAD_POOL_CC__
#define __THREAD_POOL_CC__
#include <vector>
#include <string>
#include <thread>
#include "thread_pool.hpp"
Thread_Pool::Thread_Pool(size_t n) {
this->num_threads = n;
this->__function = NULL;
this->job_queue = new Thread_Safe_Queue();
}
Thread_Pool::Thread_Pool(Thread_Safe_Queue* q) {
this->num_threads = 0;
this->__function = NULL;
this->job_queue = q;
}
Thread_Pool::Thread_Pool(Thread_Safe_Queue* q, size_t n) {
this->num_threads = n;
this->__function = NULL;
this->job_queue = q;
}
Thread_Pool::Thread_Pool(Thread_Safe_Queue* q, size_t n, void* (*f)(void*)) {
this->num_threads = n;
this->__function = f;
this->job_queue = q;
}
Thread_Pool::Thread_Pool(Thread_Safe_Queue* q, size_t n, void* (*f)(void*), void* args) {
this->num_threads = n;
this->__function = f;
this->args = args;
this->job_queue = q;
}
Thread_Pool::~Thread_Pool() {
for(int i=0; i<this->num_threads; i++) {
this->thread_arr[i].join();
}
}
void Thread_Pool::set_num_threads(size_t n) {
this->num_threads = n;
}
void Thread_Pool::set_function(void* (*f)(void*)) {
this->__function = f;
}
void Thread_Pool::set_function(void (*f) (void)) {
this->_funtion = f;
}
void Thread_Pool::set_args(void* args) {
this->args = args;
}
void Thread_Pool::start_threads() {
for(int i=0; i<this->num_threads; i++) {
this->thread_arr.push_back(std::thread(this->__function, this->args));
}
}
#endif
\ No newline at end of file
#ifndef __THREAD_POOL_H__
#define __THREAD_POOL_H__
#include <vector>
#include <string>
#include <infiniband/verbs.h>
#include <pthread.h>
#include <thread>
#include "threadsafe_queue.hpp"
class Thread_Pool {
protected:
size_t num_threads;
void* (*__function) (void*);
void (*_funtion) (void);
void* args;
std::vector<std::thread> thread_arr;
Thread_Safe_Queue* job_queue;
public:
Thread_Pool(size_t n);
Thread_Pool(Thread_Safe_Queue* q);
Thread_Pool(Thread_Safe_Queue* q, size_t n);
Thread_Pool(Thread_Safe_Queue* q, size_t n, void* (*f)(void*));
Thread_Pool(Thread_Safe_Queue* q, size_t n, void* (*f)(void*), void* args);
~Thread_Pool();
void set_num_threads(size_t n);
void set_function(void* (*f)(void*));
void set_function(void (*f) (void));
void set_args(void* args);
void start_threads();
};
#endif
\ No newline at end of file
#ifndef __THREADSAFE_QUEUE_C__
#define __THREADSAFE_QUEUE_C__
#include <queue>
#include <pthread.h>
#include "queue_context.hpp"
#include "threadsafe_queue.hpp"
Thread_Safe_Queue::Thread_Safe_Queue() {
pthread_mutex_init(&(this->queue_lock), NULL);
this->queue_size = 0;
}
Thread_Safe_Queue::Thread_Safe_Queue(size_t n, job_context* jobs[]) : Thread_Safe_Queue() {
this->queue_size = n;
for(size_t i=0; i<n; i++) {
this->q.push(jobs[i]);
}
}
Thread_Safe_Queue::~Thread_Safe_Queue() {
pthread_mutex_unlock(&(this->queue_lock));
pthread_mutex_destroy(&(this->queue_lock));
job_context* t;
while(!this->q.empty()) {
t = this->q.front();
this->q.pop();
delete(t);
}
}
void Thread_Safe_Queue::enqueue(job_context* job) {
pthread_mutex_lock(&(this->queue_lock));
this->q.push(job);
this->queue_size = this->q.size();
pthread_mutex_unlock(&(this->queue_lock));
}
void Thread_Safe_Queue::enqueue(TCP_Transport *transport, uint8_t opcode) {
pthread_mutex_lock(&(this->queue_lock));
this->q.push(new job_context(transport, opcode));
this->queue_size = this->q.size();
pthread_mutex_unlock(&(this->queue_lock));
}
void Thread_Safe_Queue::enqueue(RDMA_Transport *transport, uint8_t opcode) {
pthread_mutex_lock(&(this->queue_lock));
this->q.push(new job_context(transport, opcode));
this->queue_size = this->q.size();
pthread_mutex_unlock(&(this->queue_lock));
}
void Thread_Safe_Queue::dequeue() {
pthread_mutex_lock(&(this->queue_lock));
this->q.pop();
this->queue_size = this->q.size();
pthread_mutex_unlock(&(this->queue_lock));
}
job_context* Thread_Safe_Queue::front() {
return this->q.front();
}
job_context* Thread_Safe_Queue::get_job() {
job_context* t = NULL;
pthread_mutex_lock(&(this->queue_lock));
if(this->q.size() == 0) {
pthread_mutex_unlock(&(this->queue_lock));
return NULL;
}
t = this->q.front();
if(t==NULL) {
pthread_mutex_unlock(&(this->queue_lock));
return NULL;
}
this->q.pop();
this->queue_size = this->q.size();
pthread_mutex_unlock(&(this->queue_lock));
return t;
}
#endif
\ No newline at end of file
#ifndef __THREADSAFE_QUEUE_H__
#define __THREADSAFE_QUEUE_H__
#include <queue>
#include <pthread.h>
#include <infiniband/verbs.h>
#include "queue_context.hpp"
#include "../transport_api/transport_config.hpp"
class Thread_Safe_Queue {
public:
std::queue<struct job_context*> q;
pthread_mutex_t queue_lock;
size_t queue_size;
Thread_Safe_Queue();
Thread_Safe_Queue(size_t n, job_context* jobs[]);
~Thread_Safe_Queue();
void enqueue(job_context* job);
void enqueue(TCP_Transport *transport, uint8_t opcode);
void enqueue(RDMA_Transport *transport, uint8_t opcode);
void dequeue();
job_context* front();
job_context* get_job();
};
#endif
\ No newline at end of file
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
vector<string> conn_addrs = {
machine_allocation_ips["ub-04-nic"] //the nic
};
int conn_port = 8888;
enum Entity self_id = CLIENT;
int main() {
char* ipstr = (char*) malloc(max_ip_cmd_len);
vector<string> tokens;
Params parameters("config/client_config.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
if(debug) {
parameters.print_vals();
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
for(string conn_addr: conn_addrs) {
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher.add_connection(conn_addrs[0], conn_port, parameters);
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
if(debug) {
printf("Connected\n");
}
if(parameters.interactive_mode) {
for(int i=0; i<10; i++) {
memset(ipstr, 0, max_ip_cmd_len);
fgets(ipstr, max_ip_cmd_len, stdin);
if(debug) {
printf("Entered Command: %s\n", ipstr);
}
tokens = tokenize(string(ipstr));
if(debug) {
printf("Extracted tokens are:\n");
printf("%s\t\t%s\t\t%s\n",tokens[0].c_str(), tokens[1].c_str(), tokens[2].c_str()); //Assume its cmd arg1 arg2 form for now
for(int i=0; i<tokens.size(); i++) {
printf("%s\t", tokens[i].c_str());
}
printf("\n");
}
dispatcher.issue_cmd(tokens);
}
}
else {
string s;
for(int i=0; i<parameters.rdma_mr_size_bytes-150; i++) s.push_back((char)(i%256));
for(int i=0; i<max_req; i++) {
write_rpc(dispatcher.conn_pool->rdma_connection_pool[0],
"asddfgdsdhaskjdhaskjdhsakjdhskjdhaskjhdaskjdashdkjashdkasjhdhhhh", 64, s.c_str(), 1024);
printf("Completed request no. %d\n", i);
}
}
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: response_buffer_wait_time) {
response_buffer_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: client_rtt_time) {
client_rtt_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Response Buffer Wait Time Avg: "<<response_buffer_wait_time_sum/response_buffer_wait_time.size()<<" seconds"<<endl;
cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<endl;
cout<<"Client RTT Avg: "<<client_rtt_time_sum/client_rtt_time.size()<<" seconds"<<endl;
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
bool debug = false;
int max_packet_size_bytes = 0;
chrono::duration<double> max_cq_poll_timeout(0.0);
Thread_Safe_Queue *job_queue = new Thread_Safe_Queue();
int conn_port = 8888;
const char* t = "Hello from sender!!!";
const char* tt = "!!!rednes morf olleH";
int main() {
Params parameters("config/config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Connection_Pool conn_pool(parameters.transport_type);
conn_pool.make_connection(string(), parameters.conn_port, parameters);
write_request(conn_pool.rdma_connection_pool.front(), "123@", 4, "hello", 5);
return 0;
}
\ No newline at end of file
#include <chrono>
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
bool debug = false;
int max_packet_size_bytes = 0;
chrono::duration<double> max_cq_poll_timeout(0.0);
Thread_Safe_Queue *job_queue = new Thread_Safe_Queue();
vector<string> conn_addrs = {
"192.168.200.20",
"192.168.200.30",
"192.168.210.40", //this is the client
"192.168.200.50"
};
string client_addr("192.168.210.40");
int conn_port = 8888;
unordered_map<char*, char*> log_map;
vector<pair<char*, char*>> log;
//need to generate num_entries pairs of key-value pairs
//these keys should obey some hash functions such that
//when the hash function is applied to these keys the
//hash should lead to this particular server id
void populate_map(size_t key_size, size_t value_size, int num_entries) {
}
int main() {
Params parameters("config/config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
for(string addr: conn_addrs) {
dispatcher.add_connection(addr, conn_port, parameters);
cout<<"Connected to: "<<addr<<":"<<conn_port<<endl;
}
while(true) {
vector<job_context*> jrs = dispatcher.get_jobs();
cout<<"Got: "<<jrs.size()<<" jobs"<<endl;
dispatcher.assign_jobs(jrs);
cout<<"Current Status of Job Queue: "<<job_queue->q.size()<<" jobs, unattended"<<endl;
cout<<"The latest job posted at: "<<(chrono::steady_clock::now() - job_queue->q.front()->job_post_time).count()<<endl;
while(!job_queue->q.empty()) {
job_context *j = job_queue->get_job();
char* jstart = (char*)j->rdma_transport->get_mr_addr();
Common_Request *cr = j->request;
char* kvstart = jstart + sizeof(Common_Request);
cout<<kvstart<<endl;
job_queue->dequeue();
}
}
return 0;
}
#include <iostream>
#include <string>
#include <vector>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
vector<string> conn_addrs = {
//machine_allocation_ips["ub-05"],
machine_allocation_ips["ub-04"],
machine_allocation_ips["ub-08"] //the client
};
string client_addr(machine_allocation_ips["ub-08"]);
int conn_port = 8888;
enum Entity self_id = NIC;
int main() {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
chrono::duration<double> total_time;
Params parameters("config/nic_config.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
if(debug) {
parameters.print_vals();
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
dispatcher.setup_common_tcp_conn(conn_port);
dispatcher.common_socket_start_listen();
for(string addr: conn_addrs) {
if(dispatcher.add_conn_on_common_tcp(parameters)) {
perror("Unable to add connection");
return 1;
}
if(debug) {
printf("Connected to %s:%d\n", addr.c_str(), conn_port);
}
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
if(analyze) {
start_time = chrono::steady_clock::now();
}
while(1) {
dispatcher.assign_jobs(dispatcher.get_jobs());
dispatcher.service_send_queue();
if(req_cnt == max_req) {
break;
}
}
if(analyze) {
end_time = chrono::steady_clock::now();
}
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: response_buffer_wait_time) {
response_buffer_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: master_backup_ack_time) {
master_backup_ack_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
total_time = end_time - start_time;
cout<<"Throughput: "<<req_cnt/total_time.count()<<" requests/second"<<endl;
cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Response Buffer Wait Time Avg: "<<response_buffer_wait_time_sum/response_buffer_wait_time.size()<<" seconds"<<endl;
cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Avg Time for Nic to get all acks from servers: "<<master_backup_ack_time_sum/master_backup_ack_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<endl;
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
bool debug = false;
int max_packet_size_bytes = 0;
chrono::duration<double> max_cq_poll_timeout(0.0);
Thread_Safe_Queue *job_queue = new Thread_Safe_Queue();
vector<string> conn_addrs = {
"192.168.200.20",
//"192.168.200.30",
"192.168.210.40", //this is the client
//"192.168.200.50"
};
string client_addr("192.168.210.40");
int conn_port = 8888;
int main() {
Params parameters("config/config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
dispatcher.add_connection(client_addr, conn_port, parameters);
cout<<"Connected to: "<<client_addr<<":"<<conn_port<<endl;
char* testbuf = (char*) malloc(512);
memset(testbuf, 0, 512);
vector<RDMA_Transport*> fds = dispatcher.conn_pool->get_rdma_request_conns();
TCP_Transport *tcp_transport = fds.front()->get_tcp_conn();
cout<<"Got write from: "<<tcp_transport->get_ip()<<endl;
tcp_transport->recv_data(testbuf, 21);
cout<<"Got: "<<testbuf<<endl;
memset(testbuf, 0, 512);
fds = dispatcher.conn_pool->get_rdma_request_conns();
tcp_transport = fds.front()->get_tcp_conn();
cout<<"Got write from: "<<tcp_transport->get_ip()<<endl;
tcp_transport->recv_data(testbuf, 21);
cout<<"Got: "<<testbuf<<endl;
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
bool debug = false;
int max_packet_size_bytes = 0;
chrono::duration<double> max_cq_poll_timeout(0.0);
Thread_Safe_Queue *job_queue = new Thread_Safe_Queue();
int conn_port = 8888;
const char* t = "Hello from sender!!!";
const char* tt = "!!!rednes morf olleH";
int main() {
Params parameters("config/config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Connection_Pool conn_pool(parameters.transport_type);
conn_pool.make_connection(string(), parameters.conn_port, parameters);
TCP_Transport *tcp_transport = conn_pool.rdma_connection_pool[0]->get_tcp_conn();
tcp_transport->send_data(t, 21);
tcp_transport->send_data(tt, 21);
//conn_pool.rdma_connection_pool[0]->copy_to_mr(t);
//conn_pool.rdma_connection_pool[0]->one_sided_write();
//conn_pool.rdma_connection_pool[0]->poll_cq();
//write_rpc(conn_pool.rdma_connection_pool[0], "123", 3, "hello", 5);
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/hash.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
// using namespace std; // can't use because some namespace ambiguity conflicts in included header files
namespace chrono = std::chrono;
std::vector<std::string> conn_addrs = {
machine_allocation_ips["ub-04-nic"] //only the one nic for now
};
int conn_port = 8888;
enum Entity self_id = SERVER;
int main() {
// setup configuration from a config file
Params parameters("config/server_config.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
if(debug) {
parameters.print_vals();
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
// setup connections
if(debug) {
printf("---Setting up connections---\n");
}
for(std::string conn_addr: conn_addrs) {
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher.add_connection(conn_addrs[0], conn_port, parameters);
if(debug) {
printf("Connected to %s:%d\n", conn_addr, conn_port);
}
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
while(1) {
dispatcher.assign_jobs(dispatcher.get_jobs());
dispatcher.service_send_queue();
if(req_cnt==max_req) {
break;
}
}
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: worker_write_service_time) {
worker_write_service_time_sum += d.count();
}
for(chrono::duration<double> d: append_log_time) {
append_log_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
std::cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<std::endl;
std::cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<std::endl;
std::cout<<"Write service time Avg: "<<worker_write_service_time_sum/worker_write_service_time.size()<<" seconds"<<std::endl;
std::cout<<"Append log time Avg: "<<append_log_time_sum/append_log_time.size()<<" seconds"<<std::endl;
std::cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<std::endl;
std::cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<std::endl;
}
return 0;
}
\ No newline at end of file
#ifndef __METADATA__
#define __METADATA__
#include <stdlib.h>
#include <unordered_map>
const uint key_len = 128;
const uint val_len = 256;
const short int num_status = 33;
const int object_size = (1 << 16);//128;
const int cache_meta_size = 1024;
typedef enum Status {
/// Default return value when an operation was successful.
STATUS_OK = 0,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET = 1,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST = 2,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST = 3,
STATUS_OBJECT_EXISTS = 4,
STATUS_WRONG_VERSION = 5,
STATUS_NO_TABLE_SPACE = 6,
STATUS_MESSAGE_TOO_SHORT = 7,
STATUS_UNIMPLEMENTED_REQUEST = 8,
STATUS_REQUEST_FORMAT_ERROR = 9,
STATUS_RESPONSE_FORMAT_ERROR = 10,
STATUS_COULDNT_CONNECT = 11,
STATUS_BACKUP_BAD_SEGMENT_ID = 12,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED = 13,
STATUS_BACKUP_SEGMENT_OVERFLOW = 14,
STATUS_BACKUP_MALFORMED_SEGMENT = 15,
STATUS_SEGMENT_RECOVERY_FAILED = 16,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY = 17,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE = 18,
STATUS_TIMEOUT = 19,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP = 20,
STATUS_INTERNAL_ERROR = 21,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT = 22,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST = 23,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ = 24,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER = 25,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER = 26,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE = 27,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET = 28,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST = 29,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER = 30,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC = 31,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE = 32,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT = 33,
STATUS_MAX_VALUE = 33,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
} Status;
struct RejectRules {
uint64_t givenVersion;
uint8_t doesntExist;
uint8_t exists;
uint8_t versionLeGiven;
uint8_t versionNeGiven;
} __attribute__((packed));
struct object_metadata {
long long key;
bool valid;
uint64_t version;
char *value_ptr;
};
//NIC mem
extern std::unordered_map<long long int, struct object_metadata> obj_table;
union object {
char ph[object_size];
struct obj {
Status status;
long long key;
char value[4];
int version;
unsigned long send_ts;
} obj;
};
#endif
#include <iostream>
#include <sys/time.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const short int MODE = 0;
char *SERVER_HOST = "192.168.200.20";
//char *SERVER_NIC = "192.168.200.21";
//const int NIC_PORT = 8090;
const int SERVER_PORT = 8090;
const double err_fraction = 0.5;
int main(int argc, char * argv[]) {
char *temp = (char *) malloc(128);
struct resource_base *base;
base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(base);
base->server_name = SERVER_HOST;
base->ib_port = IB_PORT;
base->gid_idx = GID_IDX;
base->port = SERVER_PORT;
base->mr_buf_addr = (char *) malloc(base->mr_size);
strcpy(base->mr_buf_addr, "Hi from client\0");
sock_connect(base);
sync_remote_qp(base, base->mr_buf_addr, temp, 15);
union object test_obj;
union object ret_obj;
memset(&test_obj, 0, sizeof(test_obj));
memset(&ret_obj, 0, sizeof(ret_obj));
test_obj.obj.key = 1;
test_obj.obj.value[0] = 'T';
test_obj.obj.version = 1;
test_obj.obj.status = STATUS_OK;
struct timeval temp_time;
double snd_ts, rcvd_ts;
double avg = 0;
int err_cnt = 0;
int succ_cnt = 0;
enum Status ret_status;
for(int i=0; i<cache_meta_size; i++) {
test_obj.obj.key = i;
gettimeofday(&temp_time, NULL);
snd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
test_obj.obj.send_ts = snd_ts;
send_and_check(base, (char *)&test_obj, (char *)&ret_status, sizeof(test_obj), sizeof(ret_status));
gettimeofday(&temp_time, NULL);
rcvd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
avg += (rcvd_ts - snd_ts);
if(ret_status == STATUS_WRONG_VERSION) err_cnt++;
else succ_cnt++;
}
std::cout<<"Errored requests: "<<err_cnt<<std::endl;
std::cout<<"Successful requests: "<<succ_cnt<<std::endl;
std::cout<<"Avg. RTT with 1 other replica: "<<avg/(double)cache_meta_size<<" ms"<<std::endl;
cleanup(base);
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <vector>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
vector<string> conn_addrs = {
machine_allocation_ips["ub-05"],
machine_allocation_ips["ub-08"], //the client
};
string client_addr(machine_allocation_ips["ub-08"]);
int conn_port = 8888;
enum Entity self_id = SERVER;
int main() {
Params parameters("config/master_server_config.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
if(debug) {
parameters.print_vals();
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
dispatcher.setup_common_tcp_conn(conn_port);
dispatcher.common_socket_start_listen();
for(string addr: conn_addrs) {
if(dispatcher.add_conn_on_common_tcp(parameters)) {
perror("Unable to add connection");
return 1;
}
if(debug) {
printf("Connected to %s:%d\n", addr.c_str(), conn_port);
}
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
while(1) {
dispatcher.assign_jobs(dispatcher.get_jobs());
if(debug) {
printf("Current job queue size: %d\n", (int)job_queue->queue_size);
}
dispatcher.service_send_queue();
if(req_cnt == 100) {
break;
}
}
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: response_buffer_wait_time) {
response_buffer_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: master_backup_ack_time) {
master_backup_ack_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Response Buffer Wait Time Avg: "<<response_buffer_wait_time_sum/response_buffer_wait_time.size()<<" seconds"<<endl;
cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Avg Time for Master Server to get all acks from backup servers: "<<master_backup_ack_time_sum/master_backup_ack_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<endl;
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const int num_servers = 1;
const short int MODE = 2;
char *SERVER_HOST[num_servers] = {
//"192.168.200.20",
"192.168.200.50"
};
//char *SERVER_NIC = "192.168.200.21";
const int SERVER_PORT = 8080;
const int CLIENT_PORT = 8090;
const short int dev_num = 0;
const double err_fraction = 0.5;
struct object_metadata obj_metas[cache_meta_size];
std::unordered_map<long long int, struct object_metadata> obj_table;
void populate_cache_meta(std::unordered_map<long long, struct object_metadata>& m) {
for(int i=0; i<cache_meta_size; i++) {
m[i] = obj_metas[i];
m[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) m[i].valid = false;
else m[i].valid = true;
}
}
int main(int argc, char * argv[]) {
srand(time(NULL));
char *temp = (char *) malloc(128);
//nic connects to servers
struct resource_base *server_base[num_servers];
struct ibv_port_attr server_port_attr[num_servers];
union ibv_gid server_gid[num_servers];
for (int i = 0; i < num_servers; i++) {
server_base[i] = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(server_base[i]);
server_base[i]->mode = 1;
server_base[i]->ib_port = 1;
server_base[i]->gid_idx = 1;
server_base[i]->server_name = SERVER_HOST[i];
server_base[i]->port = SERVER_PORT;
open_dev(server_base[i], dev_num);
allocate_pd(server_base[i]);
register_mr(server_base[i]);
init_cq(server_base[i]);
init_qp(server_base[i]);
if(ibv_query_port(server_base[i]->ctx, server_base[i]->ib_port, &server_port_attr[i]))
D(err_msg("ibv_query_port", true, server_base[i]));
if(server_port_attr[i].state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, server_base[i]));
server_base[i]->port_attr = &server_port_attr[i];
set_mtu(server_base[i]->port_attr, &(server_base[i]->mtu));
if(ibv_query_gid(server_base[i]->ctx, server_base[i]->ib_port, server_base[i]->gid_idx, &server_gid[i]))
D(err_msg("ibv_query_gid", true, server_base[i]));
memcpy(server_base[i]->local_conn->gid, &server_gid[i], 16);
connect_qp(server_base[i]);
sync_remote_qp(server_base[i], "T", temp, 1);
if(poll_completion(server_base[i]))
D(err_msg("IBV_WR_SEND", false, server_base[i]));
}
//temporary memory host memory sim
char *temp_mem_sim = (char *) malloc(server_base[0]->mr_size);
//server connects to client
struct resource_base *client_base;
client_base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(client_base);
client_base->mode = 1;
client_base->ib_port = 1;
client_base->gid_idx = 1;
client_base->port = CLIENT_PORT;
client_base->mr_buf_addr = (char *) malloc(client_base->mr_size);
sock_connect(client_base);
char *tp = (char *) malloc(128);
sync_remote_qp(client_base, "Hi from client\0", client_base->mr_buf_addr, 15);
// std::cout<<"CLIENT SENT"<<std::endl;
// std::cout<<client_base->mr_buf_addr<<std::endl;
// std::cout<<"NIC CONNECTED TO CLIENT"<<std::endl;
int err_cnt = 0;
int succ_cnt = 0;
populate_cache_meta(obj_table);
obj_table[0].valid = false;
union object sent_obj;
enum Status send_succ = STATUS_OK;
enum Status send_err = STATUS_WRONG_VERSION;
struct timeval temp_time;
double s_time, e_time;
double avg_time_diff=0.0;
int ack_cnt;
for(int i=0; i<cache_meta_size; i++) {
memset(&sent_obj, 0, sizeof(sent_obj));
read_obj(client_base, (char *)&sent_obj, sizeof(sent_obj));
if(obj_table[sent_obj.obj.key].valid) {
for(resource_base *server: server_base)
memcpy((void*)server->mr_buf_addr, (void*)&sent_obj, sizeof(sent_obj));
gettimeofday(&temp_time, NULL);
s_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
//sync_remote_qp(server_base, "W", temp, 1); //prepare
//post_send(server_base, IBV_WR_RDMA_WRITE);
// if(poll_completion(server_base))
// D(err_msg("IBV_WR_RDMA_WRITE", true, server_base));
ack_cnt = 0;
for(resource_base *server: server_base) {
sync_remote_qp(server, "W", temp, 1);
}
for(resource_base *server: server_base) {
sync_remote_qp(server, "T", temp, 1); //ack
if(temp[0]=='S') ack_cnt++;
temp[0] = 'T';
}
if(ack_cnt==num_servers) {
gettimeofday(&temp_time, NULL);
e_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
avg_time_diff += (e_time - s_time);
succ_cnt++;
send_obj(client_base, (char *)&send_succ, sizeof(send_succ));
}
}
else {
send_obj(client_base, (char *)&send_err, sizeof(send_err));
err_cnt++;
}
}
avg_time_diff /= (double)succ_cnt;
std::cout<<"Errored requests: "<<err_cnt<<std::endl;
std::cout<<"Successful requests: "<<succ_cnt<<std::endl;
std::cout<<"Avg. replication time for storage server with 1 replica : "<<avg_time_diff<<std::endl;
for(resource_base *server: server_base)
sync_remote_qp(server, "X", temp, 1);
//for(int i=0; i<1000; i++) sync_remote_qp(client_base, "T", tp, 1);
for(resource_base *server: server_base)
cleanup(server);
cleanup(client_base);
return 0;
}
\ No newline at end of file
#include <iostream>
#include <unistd.h>
#include <sys/time.h>
#include <string.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "transport-helper.hpp"
#include "metadata.hpp"
const short int MODE = 2;
char *SERVER_HOST = "192.168.200.20";
const int SERVER_PORT = 8888;
const short int dev_num = 0;
int main(int argc, char *argv[]) {
char *temp = (char *) malloc(128);
struct resource_base *base;
struct timeval t_time;
double start_ts, end_ts;
double avg_time;
base = (struct resource_base *) malloc(sizeof(struct resource_base));
init_resources(base);
base->mode = MODE;
base->ib_port = IB_PORT;
base->gid_idx = GID_IDX;
base->port = SERVER_PORT;
open_dev(base, dev_num);
allocate_pd(base);
register_mr(base);
init_cq(base);
init_qp(base);
struct ibv_port_attr port_attr;
if(ibv_query_port(base->ctx, base->ib_port, &port_attr))
D(err_msg("ibv_query_port", true, base));
if(port_attr.state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT INACTIVE", true, base));
base->port_attr = &port_attr;
set_mtu(base->port_attr, &(base->mtu));
union ibv_gid my_gid;
if(ibv_query_gid(base->ctx, base->ib_port, base->gid_idx, &my_gid))
D(err_msg("IBV_QUERY_GID", true, base));
memcpy(base->local_conn->gid, &my_gid, 16);
connect_qp(base);
if(poll_completion(base))
D(err_msg("poll_completion", false, base));
union object obj_to_send;
int err_cnt = 0;
int succ_cnt = 0;
avg_time = 0;
enum Status ret_status;
for(int i=0; i<cache_meta_size; i++) {
obj_to_send.obj.key = i;
memcpy(base->mr_buf_addr, obj_to_send, sizeof(obj_to_send));
gettimeofday(&t_time, NULL);
start_ts = ((double)(t_time.tv_sec)*(double)(1000.0) + (double)t_time.tv_usec/(double)(1000));
send_obj(base, "R", 1);
read_obj(base, temp, 1);
if(temp=="R") {
post_send(base, IBV_WR_RDMA_READ);
if(poll_completion(base))
D(err_msg("poll_completion", true, base));
if(*(enum Status *)(base->mr_buf_addr) == STATUS_OK)
succ_cnt++;
else
err_cnt++;
gettimeofday(&t_time, NULL);
end_ts = ((double)(t_time.tv_usec)/(double)(1000)
+ (double)(t_time.tv_sec)*(double)(1000));
avg_time += (start_ts - end_ts);
}
}
avg_time /= (double)cache_meta_size;
std::cout<<"Err count: "<<err_cnt<<std::endl;
std::cout<<"Succ count: "<<succ_cnt<<std::endl;
std::cout<<"Avg RTT: "<<avg_time<<" ms"<<std::endl;
cleanup(base);
}
\ No newline at end of file
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const int num_replicas = 1;
char* SERVER_HOST[num_replicas] = {
"192.168.200.50"
};
const int CLIENT_PORT = 8888;
const int SERVER_PORT = 9999;
const short int dev_num = 0;
const double err_fraction = 0;
struct object_metadata obj_metas[cache_meta_size];
std::unordered_map<long long int, struct object_metadata> obj_table;
void populate_cache_meta(std::unordered_map<long long, struct object_metadata>& m) {
for(int i=0; i<cache_meta_size; i++) {
m[i] = obj_metas[i];
m[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) m[i].valid = false;
else m[i].valid = true;
}
}
int main(int argc, char* argv[]) {
srand(time(NULL));
char *temp = (char *) malloc(128);
struct timeval t_time;
double start_ts, end_ts, avg_time;
struct resource_base* server_base[num_replicas];
struct resource_base* client_base;
struct ibv_port_attr server_port_attr[num_replicas];
struct ibv_port_attr client_port_attr;
union ibv_gid server_gid[num_replicas];
union ibv_gid client_gid;
//connecting to other server(s)
for (int i = 0; i < num_replicas; i++) {
server_base[i] = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(server_base[i]);
server_base[i]->mode = 1;
server_base[i]->ib_port = 1;
server_base[i]->gid_idx = 1;
server_base[i]->server_name = SERVER_HOST[i];
server_base[i]->port = SERVER_PORT;
open_dev(server_base[i], dev_num);
allocate_pd(server_base[i]);
register_mr(server_base[i]);
init_cq(server_base[i]);
init_qp(server_base[i]);
if(ibv_query_port(server_base[i]->ctx, server_base[i]->ib_port, &server_port_attr[i]))
D(err_msg("ibv_query_port", true, server_base[i]));
if(server_port_attr[i].state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, server_base[i]));
server_base[i]->port_attr = &server_port_attr[i];
set_mtu(server_base[i]->port_attr, &(server_base[i]->mtu));
if(ibv_query_gid(server_base[i]->ctx, server_base[i]->ib_port, server_base[i]->gid_idx, &server_gid[i]))
D(err_msg("ibv_query_gid", true, server_base[i]));
memcpy(server_base[i]->local_conn->gid, &server_gid[i], 16);
connect_qp(server_base[i]);
sync_remote_qp(server_base[i], "T", temp, 1);
if(poll_completion(server_base[i]))
D(err_msg("IBV_WR_SEND", false, server_base[i]));
}
//primary connects to client
client_base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(client_base);
client_base->ib_port = IB_PORT;
client_base->gid_idx = GID_IDX;
client_base->port = CLIENT_PORT;
open_dev(client_base, dev_num);
allocate_pd(client_base);
register_mr(client_base);
init_cq(client_base);
init_qp(client_base);
if(ibv_query_port(client_base->ctx, client_base->ib_port, &(client_port_attr)))
D(err_msg("CLIENT PORT ATTR", true, client_base));
if(client_port_attr.state != IBV_PORT_ACTIVE)
D(err_msg("CLIENT PORT NOT ACTIVE", true, client_base));
client_base->port_attr = &(client_port_attr);
if(ibv_query_gid(client_base->ctx, client_base->ib_port, client_base->gid_idx, &(client_gid)))
D(err_msg("CLIENT QUERY GID", true, client_base));
memcpy(client_base->local_conn->gid, &(client_gid), 16);
connect_qp(client_base);
if(poll_completion(client_base))
D(err_msg("client poll completion", true, client_base));
int err_cnt = 0;
int succ_cnt = 0;
int ack_cnt = 0;
populate_cache_meta(obj_table);
union object sent_obj;
enum Status send_succ = STATUS_OK;
enum Status send_err = STATUS_WRONG_VERSION;
avg_time = 0;
for(int i=0; i<cache_meta_size; i++) {
gettimeofday(&t_time, NULL);
start_ts = ((double)t_time.tv_sec*(1000.0)
+ (double)t_time.tv_usec/(1000.0));
read_obj(client_base, temp, 1);
if(temp[0]=='R') {
post_send(client_base, IBV_WR_RDMA_READ);
if(poll_completion(client_base))
D(err_msg("Client obj read poll completion", true, client_base));
if(obj_table[((union object*)client_base->mr_buf_addr)
->obj.key].valid) {
for(struct resource_base* server: server_base)
memcpy((void*) server->mr_buf_addr,
(void*)(client_base->mr_buf_addr),
client_base->mr_size);
ack_cnt = 0;
for(struct resource_base *server: server_base)
sync_remote_qp(server, "W", temp, 1);
for(resource_base *server: server_base) {
sync_remote_qp(server, "T", temp, 1); //ack
if(temp[0]=='S') ack_cnt++;
temp[0] = 'T';
}
if(ack_cnt==num_replicas) {
gettimeofday(&t_time, NULL);
end_ts = ((double)t_time.tv_sec*1000.0
+ (double)t_time.tv_usec/1000.0);
avg_time += (end_ts - start_ts);
succ_cnt++;
memcpy((void*)client_base->mr_buf_addr,
(void*)(&send_succ),
sizeof(send_succ));
send_obj(client_base, "R", 1);
}
}
else {
memcpy((void*)client_base->mr_buf_addr,
(void*)(&send_err),
sizeof(send_err));
send_obj(client_base, "R", 1);
err_cnt++;
}
}
}
avg_time /= (double)succ_cnt;
std::cout<<"Errored requests: "<<err_cnt<<std::endl;
std::cout<<"Successful requests: "<<succ_cnt<<std::endl;
std::cout<<"Avg. replication time for storage server with 1 replica : "<<avg_time<<std::endl;
for(resource_base *server: server_base)
sync_remote_qp(server, "X", temp, 1);
for(resource_base *server: server_base)
cleanup(server);
cleanup(client_base);
return 0;
}
\ No newline at end of file
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string>
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
#define PLACEHOLDER "hello\0"
void err_msg(std::string msg, bool to_exit, struct resource_base *base) {
perror(msg.c_str());
if(to_exit)
D(cleanup(base); exit(0));
return;
}
void cleanup(struct resource_base *base) {
if(base->qp)
ibv_destroy_qp(base->qp);
if(base->mr)
ibv_dereg_mr(base->mr);
if(base->mr_buf_addr)
free(base->mr_buf_addr);
if(base->cq)
ibv_destroy_cq(base->cq);
if(base->pd)
ibv_dealloc_pd(base->pd);
if(base->ctx)
ibv_close_device(base->ctx);
if(base->local_sock_fd >= 0)
close(base->local_sock_fd);
if(base->conn_fd >= 0)
close(base->conn_fd);
return;
}
void init_resources(struct resource_base *base) {
base->ctx = NULL;
base->pd = NULL;
base->cq = NULL;
base->qp = NULL;
base->mr = NULL;
base->mr_buf_addr = NULL;
base->mr_size = ((1<<20)+200);//128;
base->mtu = IBV_MTU_512;
base->min_rnr_timer = 12;
base->timeout = 12;
base->retry_cnt = 4;
base->ib_port = 1;
base->gid_idx = 1;
base->local_sock_fd = -1;
base->conn_fd = -1;
base->mode = -1;
base->server_name = NULL;
base->port = -1;
base->local_conn = (struct conn_data *) malloc(sizeof(struct conn_data));
base->remote_conn = (struct conn_data *) malloc(sizeof(struct conn_data));
base->dev_attr = (struct ibv_device_attr *) malloc(sizeof(struct ibv_device_attr));
base->port_attr = (struct ibv_port_attr *) malloc(sizeof(struct ibv_port_attr));
return;
}
bool query_port(struct resource_base *base) {
/* Seems to not be working when called as a function,
/* Current implmentation uses this ibv_query_port call directly in main
*/
if(base->ctx == NULL)
D(err_msg("query_port;No device context", true, base));
if(base->port_attr->state == IBV_PORT_ACTIVE) return true;
return false;
}
int sock_connect(struct resource_base *base) {
if(base->port < 0)
D(cleanup(base); err_msg("tcp_connect", true, base));
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(base->port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
int sfd, cfd;
sfd = socket(AF_INET, SOCK_STREAM, 0);
if(sfd < 0)
D(err_msg("sock_connect; sfd:socket", false, base); return -1);
if(base->server_name==NULL) {
if(bind(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr))) {
close(sfd);
err_msg("sock_connect;bind", true, base);
}
listen(sfd, 1);
cfd = accept(sfd, NULL, 0);
base->conn_fd = cfd;
base->local_sock_fd = sfd;
return 0;
}
else {
//memset(&(host_addr.sin_addr.s_addr), base->server_name, 4);
inet_aton(base->server_name, &host_addr.sin_addr);
if(connect(sfd, (struct sockaddr *)&host_addr, sizeof(host_addr)) < 0) {
close(sfd);
err_msg("sock_connect;connect", true, base);
}
base->conn_fd = sfd;
return 0;
}
return -1;
}
int sync_remote_qp (struct resource_base *base, char *local_data, char *remote_data, int size) {
struct timeval send_ts;
unsigned long snd_ts;
int operation_bytes = 0;
int total_read_bytes = 0;
operation_bytes = write(base->conn_fd, local_data, size);
if(operation_bytes < size) {
err_msg("sync_remote_qp;write", true, base);
}
while(total_read_bytes < size) {
operation_bytes = read(base->conn_fd, remote_data, size);
if(operation_bytes == 0) break;
else if(operation_bytes >= 0) total_read_bytes += operation_bytes;
else break;
}
//if(remote_data[0] == 'R') post_receive(base);
// if(remote_data[0] == 'W') {
// union object sent_obj;
// enum Status ret_status;
// memset(&sent_obj, 0, sizeof(sent_obj));
// read_obj(base, (char *)&sent_obj, sizeof(sent_obj));
// if( != 0) {
// }
// // total_read_bytes = 0;
// // union object ret_obj;
// // memset(&ret_obj, 0, sizeof(ret_obj));
// // while(total_read_bytes < sizeof(ret_obj)) {
// // operation_bytes = read(base->conn_fd, (char *)&ret_obj, sizeof(union object));
// // if(operation_bytes == 0) break;
// // else if(operation_bytes >= 0) total_read_bytes += operation_bytes;
// // else break;
// // }
// // if(ret_obj.obj.version != 4) ret_obj.obj.status = STATUS_WRONG_VERSION;
// // operation_bytes = write(base->conn_fd, (char *)&ret_obj, sizeof(ret_obj));
// // if(operation_bytes < sizeof(ret_obj))
// // D(err_msg("Unable to send obj", false, base));
// }
return total_read_bytes;
}
int open_dev(struct resource_base *base, short int dev_num) {
struct ibv_device **dev_list = NULL;
int num_devs;
struct ibv_context *dev_ctx = NULL;
dev_list = ibv_get_device_list(&num_devs);
if(dev_list == NULL)
D(err_msg("open_dev;ibv_get_device_list", true, base));
dev_ctx = ibv_open_device(dev_list[dev_num]);
if(dev_ctx == NULL)
D(err_msg("open_dev;ibv_open_device", true, base));
base->ctx = dev_ctx;
//std::cout<<ibv_get_device_name(dev_list[0])<<std::endl;
ibv_free_device_list(dev_list);
return 0;
}
int allocate_pd(struct resource_base *base) {
if(base->ctx == NULL)
D(err_msg("allocate_pd;No device context", true, base));
struct ibv_pd *pd = NULL;
pd = ibv_alloc_pd(base->ctx);
if(pd == NULL)
D(err_msg("allocate_pd;ibv_alloc_pd", true, base));
base->pd = pd;
return 0;
}
int register_mr(struct resource_base *base) {
if(base->pd == NULL)
D(err_msg("register_mr;No PD allocated", true, base));
struct ibv_mr *mr;
int flags = 0;
char *buf = (char *) malloc(base->mr_size);
strcpy(buf, PLACEHOLDER);
flags = IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_LOCAL_WRITE;
mr = ibv_reg_mr(base->pd, (void *)buf, base->mr_size, flags);
if(mr == NULL)
D(err_msg("register_mr;ibv_reg_mr", true, base));
base->mr_buf_addr = buf;
base->mr = mr;
return 0;
}
int init_cq(struct resource_base *base) {
if(base->ctx == NULL)
D(err_msg("create_cq;No device context", true, base));
if(ibv_query_device(base->ctx, base->dev_attr))
D(err_msg("create_cq;ibv_query_device", true, base));
base->cq = ibv_create_cq(base->ctx, 3, NULL, NULL, 0);
if(base->cq == NULL)
D(err_msg("create_cq;ibv_create_cq", true, base));
return 0;
}
int init_qp(struct resource_base *base) {
struct ibv_qp_init_attr qp_init_attr;
memset(&qp_init_attr, 0, sizeof(qp_init_attr));
qp_init_attr.sq_sig_all = 1;
qp_init_attr.send_cq = base->cq;
qp_init_attr.recv_cq = base->cq;
qp_init_attr.qp_type = IBV_QPT_RC;
qp_init_attr.cap = {
.max_send_wr = 3,
.max_recv_wr = 3,
.max_send_sge = 3,
.max_recv_sge = 3
};
base->qp = ibv_create_qp(base->pd, &qp_init_attr);
if(!base->qp)
D(err_msg("ibv_create_qp", true, base));
//std::cout<<"QP num: "<<base->qp->qp_num<<std::endl;
return 0;
}
void post_send(struct resource_base *base, ibv_wr_opcode opcode) {
struct ibv_send_wr sr;
struct ibv_sge sge;
struct ibv_send_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) base->mr_buf_addr;
sge.length = base->mr_size;
sge.lkey = base->mr->lkey;
memset(&sr, 0, sizeof(sr));
sr.next = NULL;
sr.wr_id = 0;
sr.sg_list = &sge;
sr.num_sge = 1;
sr.opcode = opcode;
sr.send_flags = IBV_SEND_SIGNALED;
if(opcode != IBV_WR_SEND) {
sr.wr.rdma.remote_addr = base->remote_conn->addr;
sr.wr.rdma.rkey = base->remote_conn->rkey;
}
if(ibv_post_send(base->qp, &sr, &bad_wr))
D(err_msg("ibv_post_send", true, base));
return;
}
void post_receive(struct resource_base *base) {
struct ibv_recv_wr rr;
struct ibv_sge sge;
struct ibv_recv_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) base->mr_buf_addr;
sge.length = base->mr_size;
sge.lkey = base->mr->lkey;
memset(&rr, 0, sizeof(rr));
rr.next = NULL;
rr.wr_id = 0;
rr.sg_list = &sge;
rr.num_sge = 1;
if(ibv_post_recv(base->qp, &rr, &bad_wr))
D(err_msg("ibv_post_recv", true, base));
return;
}
int poll_completion(struct resource_base *base) {
struct ibv_wc wc;
unsigned long start_time_msec;
unsigned long cur_time_msec;
struct timeval cur_time;
int poll_result = -1;
gettimeofday(&cur_time, NULL);
start_time_msec = (cur_time.tv_usec*1000) + (cur_time.tv_usec/1000);
do {
poll_result = ibv_poll_cq(base->cq, 1, &wc);
gettimeofday(&cur_time, NULL);
cur_time_msec = (cur_time.tv_sec*1000) + (cur_time.tv_usec/1000);
} while(poll_result==0 &&
((cur_time_msec-start_time_msec)<MAX_POLL_CQ_TIMEOUT));
if(poll_result < 0)
D(err_msg("ibv_poll_cq", false, base));
else if (poll_result == 0){
//EMPTY CQ
//PLACEHOLDER
poll_result = 0;
return 1;
}
else {
if(wc.status != IBV_WC_SUCCESS) {
//BAD COMPLETION
return 1;
}
}
return 0;
}
int connect_qp(struct resource_base *base) {
sock_connect(base);
//std::cout<<sock_connect(base)<<std::endl;
/* ibv_query_gd seems to not be working when called in wrapper a function,
/* Current implmentation uses this ibv_query_gid call directly in main
*/
//union ibv_gid my_gid;
// if(ibv_query_gid(base->ctx, base->ib_port, base->gid_idx, &my_gid))
// D(err_msg("ibv_query_gid", false, base));
//setup exchange data
base->local_conn->addr = htonll((uintptr_t)base->mr_buf_addr);
base->local_conn->rkey = htonl(base->mr->rkey);
base->local_conn->qp_num = htonl(base->qp->qp_num);
base->local_conn->lid = htons(base->port_attr->lid);
//memcpy(&base->local_conn->gid, &my_gid, 16);
//initiate transfer
sync_remote_qp(base,
(char *)base->local_conn,
(char *)base->remote_conn,
sizeof(conn_data));
// std::cout<<"local qp: "<<base->local_conn->qp_num<<std::endl;
// std::cout<<"remote qp: "<<base->remote_conn->qp_num<<std::endl;
base->remote_conn->addr = ntohll(base->remote_conn->addr);
base->remote_conn->rkey = ntohl(base->remote_conn->rkey);
base->remote_conn->qp_num = ntohl(base->remote_conn->qp_num);
base->remote_conn->lid = ntohs(base->remote_conn->lid);
//change states
modify_reset_to_init(base);
modify_init_to_rtr(base);
if(base->server_name!=NULL) {
post_receive(base);
}
modify_rtr_to_rts(base);
if(base->server_name==NULL) {
post_send(base, IBV_WR_SEND);
}
return 0;
}
#include <string>
#include <byteswap.h>
#include <infiniband/verbs.h>
#ifndef __RDMA_HELPER__
#define __RDMA_HELPER__
#if __BYTE_ORDER == LITTLE_ENDIAN
static inline uint64_t htonll(uint64_t x) {return bswap_64(x);}
static inline uint64_t ntohll(uint64_t x) {return bswap_64(x);}
#elif __BYTE_ORDER == BIG_ENDIAN
static inline uint64_t htonll(uint64_t x) {return x;}
static inline uint64_t ntohll(uint64_t x) {return x;}
#else
#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN
#endif
#define D(x) do{x;}while(0)
const int MAX_POLL_CQ_TIMEOUT = 2000;
const int IB_PORT = 1;
const int GID_IDX = 1;
struct conn_data {
uint64_t addr; //BUFFER ADDR
uint32_t rkey; //REMOTE KEY
uint32_t qp_num; //QP NUMBER
uint16_t lid; //LOCAL ID
uint8_t gid[16]; //GLOBAL ID
};
struct resource_base {
struct ibv_context *ctx;
struct ibv_device_attr *dev_attr;
struct ibv_port_attr *port_attr;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_qp *qp;
struct ibv_mr *mr;
struct conn_data *remote_conn;
struct conn_data *local_conn;
char *mr_buf_addr;
uint32_t mr_size;
enum ibv_mtu mtu;
short int min_rnr_timer;
short int timeout;
short int retry_cnt;
short int ib_port;
short int gid_idx;
short int local_sock_fd;
short int conn_fd;
short int mode;
char *server_name;
int port;
};
void err_msg(std::string msg, bool to_exit, struct resource_base *base);
void cleanup(struct resource_base *base);
void init_resources(struct resource_base *base);
bool query_port(struct resource_base *base);
int sock_connect(struct resource_base *base);
int sync_remote_qp(struct resource_base *base, char *local_data, char *remote_data, int size);
int open_dev(struct resource_base *base, short int dev_num);
int allocate_pd(struct resource_base *base);
int register_mr(struct resource_base *base);
int init_cq(struct resource_base *base);
int init_qp(struct resource_base *base);
void post_send(struct resource_base *base, ibv_wr_opcode opcode);
void post_receive(struct resource_base *base);
int poll_completion(struct resource_base *base);
int connect_qp(struct resource_base *base);
#endif
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
int reset_state(struct resource_base *base) {
int flags = 0;
//REREGISTER MR : REREGISTER THE ACCESS TYPE
flags = IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_LOCAL_WRITE;
if(ibv_rereg_mr(base->mr, IBV_REREG_MR_CHANGE_ACCESS, base->pd, base->mr_buf_addr, base->mr_size, flags))
D(err_msg("reset_state", true, base));
//RESET QP : CHANGE STATE TO RESET, RESET CAP
flags = 0;
struct ibv_qp_attr reset_attr;
memset(&reset_attr, 0, sizeof(reset_attr));
reset_attr.qp_state = IBV_QPS_RESET;
reset_attr.cap = {
.max_send_wr = 1,
.max_recv_wr = 1,
.max_send_sge = 1,
.max_recv_sge = 1
};
flags = IBV_QP_STATE | IBV_QP_CAP;
if(ibv_modify_qp(base->qp, &reset_attr, flags))
D(err_msg("reset_state", true, base));
return 0;
}
int modify_reset_to_init(struct resource_base *base) {
struct ibv_qp_attr init_attr;
memset(&init_attr, 0, sizeof(init_attr));
init_attr.qp_state = IBV_QPS_INIT;
init_attr.pkey_index = 0;
init_attr.port_num = base->ib_port;
init_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_LOCAL_WRITE;
int flags = IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS;
if(ibv_modify_qp(base->qp, &init_attr, flags))
D(err_msg("modify_reset_to_init", true, base));
return 0;
}
int modify_init_to_rtr(struct resource_base *base) {
struct ibv_qp_attr rtr_attr;
memset(&rtr_attr, 0, sizeof(rtr_attr));
rtr_attr.qp_state = IBV_QPS_RTR;
rtr_attr.path_mtu = base->mtu;
rtr_attr.dest_qp_num = base->remote_conn->qp_num;
rtr_attr.rq_psn = 0;
rtr_attr.max_dest_rd_atomic = 1;
rtr_attr.min_rnr_timer = base->min_rnr_timer;
rtr_attr.ah_attr.is_global = 0;
rtr_attr.ah_attr.dlid = base->remote_conn->lid;
rtr_attr.ah_attr.sl = 0;
rtr_attr.ah_attr.src_path_bits = 0;
rtr_attr.ah_attr.port_num = base->ib_port;
if(base->gid_idx >= 0) {
rtr_attr.ah_attr.is_global = 1;
rtr_attr.ah_attr.port_num = base->ib_port;
memcpy(&rtr_attr.ah_attr.grh.dgid, base->remote_conn->gid, 16);
rtr_attr.ah_attr.grh.flow_label = 0;
rtr_attr.ah_attr.grh.hop_limit = 1;
rtr_attr.ah_attr.grh.sgid_index = base->gid_idx;
rtr_attr.ah_attr.grh.traffic_class = 0;
}
int flags = IBV_QP_STATE |
IBV_QP_AV |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER;
if(ibv_modify_qp(base->qp, &rtr_attr, flags))
D(err_msg("modify_init_to_rtr", true, base));
return 0;
}
int modify_rtr_to_rts (struct resource_base *base) {
struct ibv_qp_attr rts_attr;
memset(&rts_attr, 0, sizeof(rts_attr));
rts_attr.qp_state = IBV_QPS_RTS;
rts_attr.timeout = base->timeout;
rts_attr.retry_cnt = base->retry_cnt;
rts_attr.rnr_retry = 0;
rts_attr.sq_psn = 0;
rts_attr.max_rd_atomic = 1;
int flags = IBV_QP_STATE |
IBV_QP_TIMEOUT |
IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC;
if(ibv_modify_qp(base->qp, &rts_attr, flags))
D(err_msg("modify_rtr_to_rts", true, base));
return 0;
}
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#ifndef __RDMA_STATES__
#define __RDMA_STATES__
int reset_state(struct resource_base *base);
int modify_reset_to_init(struct resource_base *base);
int modify_init_to_rtr(struct resource_base *base);
int modify_rtr_to_rts(struct resource_base *base);
#endif
\ No newline at end of file
#include <iostream>
#include <unistd.h>
#include <sys/time.h>
#include <string.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
const short int MODE = 2;
char *SERVER_HOST = "192.168.200.20";
char *SERVER_NIC = "192.168.200.21";
const int NIC_PORT = 8080;
const short int dev_num = 0;
void dummy_loghandler() {
for(int i=0; i<100; i++);
return;
}
int main(int argc, char * argv[]) {
char *temp = (char *) malloc(128);
struct resource_base *base;
base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(base);
// base->server_name = SERVER_NAME;
base->mode = MODE;
base->ib_port = IB_PORT;
base->gid_idx = GID_IDX;
base->port = NIC_PORT;
open_dev(base, dev_num);
allocate_pd(base);
register_mr(base);
init_cq(base);
init_qp(base);
struct ibv_port_attr port_attr;
if(ibv_query_port(base->ctx, base->ib_port, &port_attr))
D(err_msg("ibv_query_port", true, base));
if(port_attr.state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, base));
base->port_attr = &port_attr;
union ibv_gid my_gid;
if(ibv_query_gid(base->ctx, base->ib_port, base->gid_idx, &my_gid))
D(err_msg("ibv_query_gid", true, base));
memcpy(base->local_conn->gid, &my_gid, 16);
connect_qp(base);
strcpy(base->mr_buf_addr, "Hi from server");
sync_remote_qp(base, "R", temp, 1);
post_send(base, IBV_WR_RDMA_WRITE);
temp[0] = 'A';
int r_cnt = 0;
long long int prev = -1;
struct timeval temp_time;
double s_time, e_time;
double avg_time = 0;
//memset(&base->mr_buf_addr, 0, base->mr_size);
union object *sent_obj = (union object *) base->mr_buf_addr;
sent_obj->obj.key = 0;
while(temp[0]!='X') {
sync_remote_qp(base, "T", temp, 1);
if(temp[0]=='W') {
gettimeofday(&temp_time, NULL);
s_time = (double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000;
post_send(base, IBV_WR_RDMA_READ);
if(poll_completion(base))
D(err_msg("IBV_WR_RDMA_READ", true, base));
while(prev==sent_obj->obj.key);
dummy_loghandler();
prev = sent_obj->obj.key;
gettimeofday(&temp_time, NULL);
e_time = (double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000;
avg_time += (e_time - s_time);
sync_remote_qp(base, "S", temp, 1);
r_cnt++;
}
}
avg_time /= (double)r_cnt;
std::cout<<"Size of Object: "<<sizeof(union object)<<std::endl;
std::cout<<"Objects received: "<<r_cnt<<std::endl;
std::cout<<"Avg. RDMA Read time: "<<avg_time<<" ms"<<std::endl;
//new
// post_send(base, IBV_WR_RDMA_READ);
// if(poll_completion(base))
// D(err_msg("IBV_WR_RDMA_READ", false, base));
// std::cout<<"Nic has"<<std::endl;
// strcpy(base->mr_buf_addr, "HELLO SMITTY\0");
// post_send(base, IBV_WR_SEND);
// if(poll_completion(base))
// D(err_msg("IBV_WR_SEND", true, base));
cleanup(base);
return 0;
}
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const int num_servers = 2;
const short int MODE = 2;
char *SERVER_HOST[num_servers] = {
"192.168.200.20",
"192.168.200.50"
};
char *CLIENT_HOST = "192.168.200.40";
char *SERVER_NIC = "192.168.200.21";
const int SERVER_PORT = 8080;
const int CLIENT_PORT = 8090;
const short int dev_num = 0;
const double err_fraction = 0.0;
struct object_metadata obj_metas[cache_meta_size];
std::unordered_map<long long int, struct object_metadata> obj_table;
void populate_cache_meta(std::unordered_map<long long, struct object_metadata>& m) {
for(int i=0; i<cache_meta_size; i++) {
m[i] = obj_metas[i];
m[i].key = i;
if((double)rand()/RAND_MAX <= err_fraction) m[i].valid = false;
else m[i].valid = true;
}
}
int main(int argc, char * argv[]) {
srand(time(NULL));
char *temp = (char *) malloc(128);
//nic connects to servers
struct resource_base *server_base[num_servers];
struct ibv_port_attr server_port_attr[num_servers];
union ibv_gid server_gid[num_servers];
for (int i = 0; i < num_servers; i++) {
server_base[i] = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(server_base[i]);
server_base[i]->mode = 1;
server_base[i]->ib_port = 1;
server_base[i]->gid_idx = 1;
server_base[i]->server_name = SERVER_HOST[i];
server_base[i]->port = SERVER_PORT;
open_dev(server_base[i], dev_num);
allocate_pd(server_base[i]);
register_mr(server_base[i]);
init_cq(server_base[i]);
init_qp(server_base[i]);
if(ibv_query_port(server_base[i]->ctx, server_base[i]->ib_port, &server_port_attr[i]))
D(err_msg("ibv_query_port", true, server_base[i]));
if(server_port_attr[i].state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, server_base[i]));
server_base[i]->port_attr = &server_port_attr[i];
if(ibv_query_gid(server_base[i]->ctx, server_base[i]->ib_port, server_base[i]->gid_idx, &server_gid[i]))
D(err_msg("ibv_query_gid", true, server_base[i]));
memcpy(server_base[i]->local_conn->gid, &server_gid[i], 16);
connect_qp(server_base[i]);
sync_remote_qp(server_base[i], "T", temp, 1);
while(poll_completion(server_base[i])); //BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("IBV_WR_SEND", false, server_base[i]));
}
//nic connects to client
struct resource_base *client_base;
struct ibv_port_attr client_port_attr;
union ibv_gid client_gid;
client_base = (struct resource_base*) malloc(sizeof(struct resource_base));
init_resources(client_base);
client_base->mode = 1;
client_base->ib_port = 1;
client_base->gid_idx = 1;
client_base->server_name = CLIENT_HOST;
client_base->port = CLIENT_PORT;
open_dev(client_base, dev_num);
allocate_pd(client_base);
register_mr(client_base);
init_cq(client_base);
init_qp(client_base);
if(ibv_query_port(client_base->ctx, client_base->ib_port, &client_port_attr))
D(err_msg("ibv_query_port", true, client_base));
if(client_port_attr.state != IBV_PORT_ACTIVE)
D(err_msg("IB PORT NOT ACTIVE", true, client_base));
client_base->port_attr = &client_port_attr;
if(ibv_query_gid(client_base->ctx, client_base->ib_port, client_base->gid_idx, &client_gid))
D(err_msg("ibv_query_gid", true, client_base));
memcpy(client_base->local_conn->gid, &client_gid, 16);
connect_qp(client_base);
sync_remote_qp(client_base, "T", temp, 1);
while(poll_completion(client_base)); //BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("IBV_WR_SEND, client", false, client_base));
//client_base->mr_buf_addr = (char *) malloc(client_base->mr_size);
//sock_connect(client_base);
//char *tp = (char *) malloc(128);
//sync_remote_qp(client_base, "Hi from client\0", client_base->mr_buf_addr, 15);
//std::cout<<"CLIENT SENT"<<std::endl;
//std::cout<<client_base->mr_buf_addr<<std::endl;
//std::cout<<"NIC CONNECTED TO CLIENT"<<std::endl;
int err_cnt = 0;
int succ_cnt = 0;
populate_cache_meta(obj_table);
//obj_table[0].valid = false;
union object sent_obj;
enum Status send_succ = STATUS_OK;
enum Status send_err = STATUS_WRONG_VERSION;
struct timeval temp_time;
double s_time, e_time;
double avg_time_diff=0.0;
int ack_cnt;
for(int i=0; i<cache_meta_size; i++) {
memset(&sent_obj, 0, sizeof(sent_obj));
read_obj(client_base, temp, 1);
if(temp[0]=='R')
post_send(client_base, IBV_WR_RDMA_READ);
sync_remote_qp(client_base, "T", temp, 1);
while(poll_completion(client_base)); //BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("client obj poll completion", false, client_base));
memcpy((void*)&sent_obj, (void*)client_base->mr_buf_addr, sizeof(sent_obj));
//std::cout<<"Object got with key: "<<sent_obj.obj.key<<std::endl;
if(obj_table[sent_obj.obj.key].valid) {
for(resource_base *server: server_base)
memcpy((void*)server->mr_buf_addr, (void*)&sent_obj, sizeof(sent_obj));
gettimeofday(&temp_time, NULL);
s_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
//sync_remote_qp(server_base, "W", temp, 1); //prepare
//post_send(server_base, IBV_WR_RDMA_WRITE);
// if(poll_completion(server_base))
// D(err_msg("IBV_WR_RDMA_WRITE", true, server_base));
ack_cnt = 0;
for(resource_base *server: server_base) {
sync_remote_qp(server, "W", temp, 1);
}
for(resource_base *server: server_base) {
sync_remote_qp(server, "T", temp, 1); //ack
if(temp[0]=='S') ack_cnt++;
temp[0] = 'T';
}
if(ack_cnt==num_servers) {
gettimeofday(&temp_time, NULL);
e_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
avg_time_diff += (e_time - s_time);
succ_cnt++;
send_obj(client_base, (char *)&send_succ, sizeof(send_succ));
}
//sync_remote_qp(server_base, "T", temp, 1); //ack
// if(temp[0]=='S') {
// gettimeofday(&temp_time, NULL);
// e_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
// avg_time_diff += (e_time - s_time);
// succ_cnt++;
// send_obj(client_base, (char *)&send_succ, sizeof(send_succ));
// }
}
else {
send_obj(client_base, (char *)&send_err, sizeof(send_err));
err_cnt++;
}
}
avg_time_diff /= (double)succ_cnt;
std::cout<<"Errored requests: "<<err_cnt<<std::endl;
std::cout<<"Successful requests: "<<succ_cnt<<std::endl;
std::cout<<"Nic to Host obj transfer time: "<<avg_time_diff<<" ms"<<std::endl;
for(resource_base *server: server_base)
sync_remote_qp(server, "X", temp, 1);
//for(int i=0; i<1000; i++) sync_remote_qp(client_base, "T", tp, 1);
for(resource_base *server: server_base)
cleanup(server);
cleanup(client_base);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "../include/cli_api.hpp"
#include "../include/common.hpp"
#include "../include/client_functions.hpp"
#include "../include/connection_pool.hpp"
#include "../include/dispatcher.hpp"
#include "../include/thread_pool.hpp"
#include "../config/read_config.hpp"
using namespace std;
vector<string> conn_addrs = {
machine_allocation_ips["ub-04-nic"]
};
int conn_port = 8888;
int main() {
string tcp_test2("Hey this is host");
string tcp_test_str("Hello this is tcp conn");
string test_string("Hello this is host");
Params parameters("../config/server_config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
dispatcher.add_connection(conn_addrs[0], conn_port, parameters);
printf("Connected to nic\n");
//printf("Size of conn pool: %d\n", dispatcher.conn_pool->rdma_connection_pool.size());
RDMA_Transport *rdma_transport = dispatcher.conn_pool->rdma_connection_pool[0];
printf("%p\n", rdma_transport);
TCP_Transport *tcp_transport = rdma_transport->get_tcp_conn();
//send tcp
printf("Now sending tcp\n");
tcp_transport->set_mr(test_string, test_string.size());
tcp_transport->send_data();
//recv tcp
char *buf = NULL;
tcp_transport->recv_data(&buf);
printf("Received: %s\n", buf);
//one sided read
if(rdma_transport->one_sided_read()) {
perror("One sided write error");
return -1;
}
//poll cq
if(rdma_transport->poll_cq()) {
perror("CQ err");
return -1;
}
printf("Received via RDMA: %s\n", rdma_transport->get_mr_addr());
//recv tcp
free(buf);
buf = NULL;
tcp_transport->recv_data(&buf);
printf("Received: %s\n", buf);
//set mr
rdma_transport->copy_to_mr(test_string.c_str());
//send tcp
tcp_transport->set_mr(tcp_test2, tcp_test2.size());
tcp_transport->send_data();
//recv tcp
free(buf);
buf = NULL;
tcp_transport->recv_data(&buf);
printf("Received: %s\n", buf);
return 0;
}
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "../include/cli_api.hpp"
#include "../include/common.hpp"
#include "../include/client_functions.hpp"
#include "../include/connection_pool.hpp"
#include "../include/dispatcher.hpp"
#include "../include/thread_pool.hpp"
#include "../config/read_config.hpp"
using namespace std;
vector<string> conn_addrs = {
machine_allocation_ips["ub-05"]
};
int conn_port = 8888;
int main() {
string tcp_test_str("Hello this is tcp conn");
string test_string("Hello this is NIC");
Params parameters("../config/nic_config.conf");
parameters.read_params();
parameters.print_vals();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
dispatcher.add_connection(string(), conn_port, parameters);
printf("Connected to host\n");
RDMA_Transport *rdma_transport = dispatcher.conn_pool->rdma_connection_pool[0];
TCP_Transport *tcp_transport = rdma_transport->get_tcp_conn();
char *buf = NULL;
//recv data
tcp_transport->recv_data(&buf);
printf("Received: %s\n", buf);
//set mr
rdma_transport->copy_to_mr(test_string.c_str());
//send tcp
tcp_transport->set_mr(tcp_test_str, tcp_test_str.size());
tcp_transport->send_data();
//send tcp
tcp_transport->set_mr(tcp_test_str, tcp_test_str.size());
tcp_transport->send_data();
//recv tcp
free(buf);
buf = NULL;
tcp_transport->recv_data(&buf);
//one sided read
if(rdma_transport->one_sided_read()) {
perror("One sided write error");
return -1;
}
//poll cq
if(rdma_transport->poll_cq()) {
perror("CQ err");
return -1;
}
printf("Received via RDMA: %s\n", rdma_transport->get_mr_addr());
//send tcp
tcp_transport->set_mr(tcp_test_str, tcp_test_str.size());
tcp_transport->send_data();
return 0;
}
#include <chrono>
#include <iostream>
#include <thread>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using namespace std;
double arrival_rate = 0.0;
vector<string> conn_addrs = {
machine_allocation_ips["ub-04-nic"] //the nic
};
int conn_port = 8888;
string key, value;
size_t key_length = 64;
enum Entity self_id = CLIENT;
void* request_thread(Dispatcher dispatcher, size_t value_size) {
// request_thread puts out a request to the master
// according to the arrival rate mentioned in the config file
chrono::duration<double> interval(1/(arrival_rate));
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
for(int i=0; i<500; i++) {
//put out a request
write_send_request(dispatcher.conn_pool->rdma_connection_pool[0],
key.c_str(),
key_length,
value.c_str(),
value_size);
//sleep for interval
this_thread::sleep_for(interval);
}
return NULL;
}
void* response_thread(Dispatcher dispatcher) {
// response_thread checks for any responses from the master
for(int i=0; i<max_req; i++) {
write_get_response(dispatcher.conn_pool->rdma_connection_pool[0]);
}
return NULL;
}
int main() {
srand(time(NULL));
char* ipstr = (char*) malloc(max_ip_cmd_len);
vector<string> tokens;
Params parameters("config/threaded_client.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
arrival_rate = parameters.arrival_rate;
if(debug) {
parameters.print_vals();
}
// make key
for(int i=0; i<key_length; i++) {
key.push_back((char)(rand()%256));
}
for(int i=0; i<parameters.rdma_mr_size_bytes-150; i++) {
value.push_back((char)(i%256));
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads);
for(string conn_addr: conn_addrs) {
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher.add_connection(conn_addrs[0], conn_port, parameters);
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
if(debug) {
printf("Connected\n");
}
thread req_thrd(request_thread, dispatcher, parameters.rdma_mr_size_bytes-150);
thread resp_thrd(response_thread, dispatcher);
req_thrd.join();
resp_thrd.join();
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: response_buffer_wait_time) {
response_buffer_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: client_rtt_time) {
client_rtt_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
cout<<"Set Arrival Rate: "<<parameters.arrival_rate<<" requests/second"<<endl;
cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Response Buffer Wait Time Avg: "<<response_buffer_wait_time_sum/response_buffer_wait_time.size()<<" seconds"<<endl;
cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<endl;
cout<<"Client RTT Avg: "<<client_rtt_time_sum/client_rtt_time.size()<<" seconds"<<endl;
}
return 0;
}
\ No newline at end of file
#include <chrono>
#include <iostream>
#include <thread>
#include <random>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
#include "include/thread_functions.hpp"
using namespace std;
double arrival_rate = 0.0;
vector<string> conn_addrs = {
machine_allocation_ips["ub-04-nic"] //the nic
};
int num_connections = 4;
int conn_port = 8888;
string key, value;
size_t key_length = 64;
enum Entity self_id = CLIENT;
void* request_manager(Dispatcher dispatcher, size_t value_size, uint8_t transport_type) {
chrono::duration<double> interval(1/(arrival_rate));
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
job_context* job;
Common_Request* cr;
char* packet;
size_t final_size = sizeof(Common_Request) + key_length + 1 + value_size + 1;
for(int i=0; i<500; i++) {
packet = (char*) malloc(final_size);
memset(packet, 0, final_size);
memcpy(packet+sizeof(Common_Request), key.c_str(), key_length);
memcpy(packet+sizeof(Common_Request)+key_length+1, value.c_str(), value_size);
cr = (Common_Request*) packet;
cr->opcode = WRITE;
cr->service_type = MASTER_SERVICE;
cr->type = TYPE_REQUEST;
cr->req.w_request.common.opcode = WRITE;
cr->req.w_request.length = key_length + 1 + value_size + 1;
job = new job_context();
job->opcode = WRITE;
job->job_type = TYPE_REQUEST;
job->request = cr;
job->request_packet = packet;
job->service_type = MASTER_SERVICE;
job->transport_type = transport_type;
job->job_post_time = chrono::steady_clock::now();
request_queue->enqueue(job);
job = NULL;
cr = NULL;
packet = NULL;
this_thread::sleep_for(interval);
}
return NULL;
}
int main() {
srand(time(NULL));
random_device rd;
mt19937 gen(rd());
uniform_int_distribution<> distr(1, 255);
char* ipstr = (char*) malloc(max_ip_cmd_len);
vector<string> tokens;
thread worker_threads[num_connections];
Params parameters("config/threaded_client.conf");
parameters.read_params();
debug = parameters.debug;
analyze = parameters.analyze;
max_packet_size_bytes = parameters.max_packet_size_bytes;
max_cq_poll_timeout = parameters.rdma_cq_poll_timeout_ms;
conn_port = parameters.conn_port;
arrival_rate = parameters.arrival_rate;
if(debug) {
parameters.print_vals();
}
Dispatcher dispatcher(parameters.transport_type, parameters.num_threads, client_worker_function);
for(int i=0; i<num_connections; i++) {
dispatcher.add_connection(conn_addrs[0], conn_port, parameters);
if(debug) {
printf("Connection #%d made\n", i);
}
if(debug) {
dispatcher.conn_pool->rdma_connection_pool.back()->check_rdma_onesided();
}
}
if(debug) {
printf("Connected\n");
}
for(int i=0; i<key_length; i++) {
key.push_back((char)(distr(gen)));
}
for(int i=0; i<parameters.rdma_mr_size_bytes-150; i++) {
value.push_back((char)(distr(gen)));
}
for(int i=0; i < num_connections; i++) {
worker_threads[i] = thread(client_worker_function, dispatcher.conn_pool->rdma_connection_pool[i]);
}
thread sender_thread(request_manager, dispatcher, parameters.rdma_mr_size_bytes-150, parameters.transport_type);
sender_thread.join();
if(analyze) {
for(chrono::duration<double> d: request_queue_wait_time) {
request_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: response_buffer_wait_time) {
response_buffer_wait_time_sum += d.count();
}
for(chrono::duration<double> d: send_queue_wait_time) {
send_queue_wait_time_sum += d.count();
}
for(chrono::duration<double> d: client_rtt_time) {
client_rtt_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_read_time) {
rdma_one_sided_read_time_sum += d.count();
}
for(chrono::duration<double> d: rdma_one_sided_write_time) {
rdma_one_sided_write_time_sum += d.count();
}
cout<<"Set Arrival Rate: "<<parameters.arrival_rate<<" requests/second"<<endl;
cout<<"Request Queue Wait Time Avg: "<<request_queue_wait_time_sum/request_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Response Buffer Wait Time Avg: "<<response_buffer_wait_time_sum/response_buffer_wait_time.size()<<" seconds"<<endl;
cout<<"Send Queue Wait Time Avg: "<<send_queue_wait_time_sum/send_queue_wait_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided read time: "<<rdma_one_sided_read_time_sum/rdma_one_sided_read_time.size()<<" seconds"<<endl;
cout<<"Avg RDMA one sided write time: "<<rdma_one_sided_write_time_sum/rdma_one_sided_write_time.size()<<" seconds"<<endl;
cout<<"Client RTT Avg: "<<client_rtt_time_sum/client_rtt_time.size()<<" seconds"<<endl;
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include <chrono>
#include <thread>
#include "transport_config.hpp"
#include "common.hpp"
using namespace std;
int main() {
string buf = "hi from client";
int port = 8888;
RDMA_Transport transport("192.168.200.20", port);
transport.set_mr_size(512);
if(transport.rdma_setup()) {
perror("RDMA setup failed");
}
RDMA_config *config = transport.get_config();
this_thread::sleep_for(chrono::milliseconds(5));
cout<<config->mr.mr_buf_addr<<endl;
return 0;
}
\ No newline at end of file
#include <iostream>
#include "transport_config.hpp"
using namespace std;
int main() {
RDMA_Transport r;
r.rdma_open_dev();
r.rdma_allocate_pd();
r.rdma_register_mr();
r.rdma_init_cq();
r.rdma_init_qp();
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include "transport_config.hpp"
#include "common.hpp"
using namespace std;
int main() {
string buf = "hi from server";
int port = 8888;
RDMA_Transport transport("", port);
transport.set_mr_size(512);
if(transport.rdma_setup()) {
perror("RDMA Setup failed");
return -1;
}
if(transport.copy_to_mr(buf.c_str())) {
perror("Unable to copy to MR");
return -1;
}
RDMA_config *config = transport.get_config();
cout<<config->mr.mr_buf_addr<<endl;
if(transport.one_sided_write()) {
perror("Unable to issue RDMA Write");
}
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include "transport_config.hpp"
using namespace std;
int main() {
string str = "he\0";
char buf[100];
TCP_Transport t("127.0.0.1", 8888);
cout<<t.setup()<<endl;
cout<<"Connected..."<<endl;
t.send_data("he\0", 2);
t.recv_data(buf, 2);
cout<<buf;
t.close_conn();
return 0;
}
\ No newline at end of file
#include <iostream>
#include <string>
#include "transport_config.hpp"
using namespace std;
int main() {
char buf[100];
TCP_Transport t(8888);
cout<<t.setup()<<endl;
cout<<"Connected..."<<endl;
t.recv_data(buf, 2);
cout<<buf;
t.send_data("yo\0", 2);
t.close_conn();
return 0;
}
\ No newline at end of file
#ifndef __TRANSPORT_CONFIG_CC__
#define __TRANSPORT_CONFIG_CC__
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <string>
#include <chrono>
#include <infiniband/verbs.h>
#include "../include/common.hpp"
#include "transport_config.hpp"
namespace chrono = std::chrono;
TCP_Transport::TCP_Transport() {
this->config = new TCP_config();
this->mr = NULL;
this->mr_size = 0;
}
TCP_Transport::TCP_Transport(std::string a, int b) {
this->config = new TCP_config(a, b);
this->mr = NULL;
this->mr_size = 0;
}
TCP_Transport::TCP_Transport(int b) {
this->config = new TCP_config(b);
this->mr = NULL;
this->mr_size = 0;
}
TCP_Transport::TCP_Transport(TCP_config& t) {
this->config = new TCP_config(t);
this->mr = NULL;
this->mr_size = 0;
}
TCP_Transport::~TCP_Transport() {
delete(this->config);
this->config = NULL;
if(this->mr) {
free(this->mr);
this->mr = NULL;
}
}
std::string TCP_Transport::get_ip() {
return this->config->ip;
}
int TCP_Transport::get_port() {
return this->config->port;
}
void TCP_Transport::set_ip(std::string addr) {
this->config->ip = addr;
return;
}
void TCP_Transport::set_ip(char* addr) {
this->config->ip = std::string(addr);
return;
}
void TCP_Transport::set_port(int port) {
this->config->port = port;
return;
}
void TCP_Transport::set_mr(char* buf_addr, size_t buf_size) {
// if(this->mr!=NULL) {
// free(this->mr);
// this->mr = NULL;
// this->mr_size = 0;
// }
this->mr = buf_addr;
this->mr_size = buf_size;
}
void TCP_Transport::set_mr(std::string str, size_t buf_size) {
// if(this->mr!=NULL) {
// free(this->mr);
// this->mr = NULL;
// this->mr_size = 0;
// }
char *temp = (char*) malloc(buf_size);
memcpy(temp, str.c_str(), buf_size);
this->mr = temp;
this->mr_size = buf_size;
}
// sets up a local socket
int TCP_Transport::make_socket() {
TCP_config *config = this->config;
if(config->port < 0) {
//error
return -1;
}
//setup the socket
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(config->port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
config->local_sock_fd = socket(host_addr.sin_family,
SOCK_STREAM,
0);
if(config->local_sock_fd < 0) {
//error
return -1;
}
if(config->ip.empty()) {
//server
if(bind(config->local_sock_fd,
(struct sockaddr*)&host_addr,
sizeof(host_addr)))
{
//error
close(config->local_sock_fd);
return -1;
}
}
return 0;
}
int TCP_Transport::start_listen() {
TCP_config *tcp_config = this->config;
if(this->config->local_sock_fd < 0) {
perror("start_listen: No socket created");
return -1;
}
if(listen(config->local_sock_fd, 64) < 0) {
perror("start_listen: listen error");
return -1;
}
return 0;
}
// creates a socket and sets it
// to listen
int TCP_Transport::make_socket_listen() {
TCP_config *config = this->config;
if(this->make_socket()) {
//error
perror("make_socket_listen: Unable to create socket");
return -1;
}
if(listen(config->local_sock_fd, 64) < 0) {
//error
perror("make_socket_listen: Listen error");
return -1;
}
return 0;
}
// accept new conns
int TCP_Transport::accept_conn() {
listen(config->local_sock_fd, 1);
config->conn_fd = accept(config->local_sock_fd, NULL, 0);
}
// makes the local socket connect to
// given ip and port
int TCP_Transport::make_conn() {
TCP_config *config = this->config;
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(config->port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
inet_aton(config->ip.c_str(), &host_addr.sin_addr);
if(connect(config->local_sock_fd,
(struct sockaddr*)&host_addr,
sizeof(host_addr)) < 0)
{
//error
close(config->local_sock_fd);
return -1;
}
config->conn_fd = config->local_sock_fd;
return 0;
}
int TCP_Transport::setup() {
TCP_config *config = this->config;
//sanity check
if(config->port < 0) {
//error
perror("TCP_Transport::setup: Port not set");
return -1;
}
//setup tcp
struct sockaddr_in host_addr;
memset(&host_addr, 0, sizeof(host_addr));
host_addr.sin_family = AF_INET;
host_addr.sin_port = htons(config->port);
host_addr.sin_addr.s_addr = htonl(INADDR_ANY);
config->local_sock_fd = socket(host_addr.sin_family,
SOCK_STREAM,
0);
if(config->local_sock_fd < 0) {
//error
perror("TCP_Transport::setup: unable to get socket");
return -1;
}
if(config->ip.empty()) {
//this is the server
if(bind(config->local_sock_fd,
(struct sockaddr*)&host_addr,
sizeof(host_addr))) {
//error
perror("TCP_Transport::setup:bind: unable to bind");
close(config->local_sock_fd);
return -1;
}
listen(config->local_sock_fd, 1);
config->conn_fd = accept(config->local_sock_fd, NULL, 0);
return 0;
}
else {
//this is the client
inet_aton(config->ip.c_str(), &host_addr.sin_addr);
if(connect(config->local_sock_fd,
(struct sockaddr*)&host_addr,
sizeof(host_addr)) < 0) {
//error
perror("TCP_Transport::setup:connect: unable to connect");
close(config->local_sock_fd);
return -1;
}
config->conn_fd = config->local_sock_fd;
return 0;
}
return -1;
}
int TCP_Transport::send_data(std::string str, int size) {
const char* buf = str.c_str();
size_t t;
size_t sz = size;
size_t i = 0;
while(sz > 0) {
t = send(this->config->conn_fd, (void*)(buf+i), sz, 0);
if(t==-1) {
//error
return -1;
}
sz -= t;
i += t;
}
return sz;
}
int TCP_Transport::send_data(char* buf, int size) {
size_t t;
size_t sz = size;
size_t i = 0;
while(sz > 0) {
t = send(this->config->conn_fd, (void*)(buf+i), sz, 0);
if(t==-1) {
//error
return -1;
}
sz -= t;
i += t;
}
return sz;
}
int TCP_Transport::send_data() {
if(this->mr == NULL || this->mr_size == 0) {
//error
return -1;
}
int t = this->send_data(this->mr, this->mr_size);
//if(this->mr!=NULL) free(this->mr);
this->mr = NULL;
this->mr_size = 0;
return t;
}
int TCP_Transport::recv_data(char* buf, int size) {
size_t t;
size_t sz = size;
size_t i = 0;
while(sz > 0) {
t = recv(this->config->conn_fd, (void*)(buf+i), sz, 0);
if(t==0) {
return 0;
}
if(t==-1) {
//error
return -1;
}
sz -= t;
i += t;
}
return sz;
}
ssize_t TCP_Transport::recv_data(char** null_buffer) {
//null_buffer should be NULL
if(*null_buffer!=NULL) {
printf("Buffer passed to recv_data must be NULL\n");
return 0;
}
*null_buffer = (char *) malloc(max_packet_size_bytes);
memset(*null_buffer, 0, max_packet_size_bytes);
ssize_t sz = 0;
ssize_t t = 0;
//size_t i = 0;
size_t dx = 1025;
do {
t = recv(this->config->conn_fd, (void*)(*null_buffer+sz), dx, 0);
if(t==-1) {
//error
return -1;
}
sz += t;
} while(t!=0 & !(t<dx));
return sz;
}
int TCP_Transport::send_and_recv(char* send_buf, int send_size, char* recv_buf, int recv_size) {
if(this->send_data(send_buf, send_size)) {
//error
return -1;
}
if(this->recv_data(recv_buf, recv_size)) {
//error
return -1;
}
return 0;
}
void TCP_Transport::set_local_fd(int fd) {
this->config->local_sock_fd = fd;
}
void TCP_Transport::set_conn_fd(int fd) {
this->config->conn_fd = fd;
}
int TCP_Transport::get_local_fd() {
return this->config->local_sock_fd;
}
int TCP_Transport::get_conn_fd() {
return this->config->conn_fd;
}
int TCP_Transport::close_conn() {
if(this->config->conn_fd!=-1)
close(this->config->conn_fd);
if(this->config->local_sock_fd!=-1)
close(this->config->local_sock_fd);
return 0;
}
//debug functions
RDMA_config* RDMA_Transport::get_config() {
return this->rdma_config;
}
void RDMA_Transport::fill_default_parameters() {
this->rdma_config->dev.ctx = NULL;
this->rdma_config->pd = NULL;
this->rdma_config->_cq->cq = NULL;
this->rdma_config->_qp->qp = NULL;
this->rdma_config->mr.mr = NULL;
this->rdma_config->mr.mr_buf_addr = NULL;
this->rdma_config->mr.mr_size = ((1<<9)+20);
this->rdma_config->mtu = IBV_MTU_512;
this->rdma_config->min_rnr_timer = 12;
this->rdma_config->timeout = 12;
this->rdma_config->retry_cnt = 4;
this->rdma_config->ib_port = 1;
this->rdma_config->gid_idx = 1;
}
//required functions
RDMA_Transport::RDMA_Transport() {
this->rdma_config = new RDMA_config();
this->tcp_transport = new TCP_Transport();
this->fill_default_parameters();
}
RDMA_Transport::RDMA_Transport(std::string a, int b) {
this->rdma_config = new RDMA_config();
this->tcp_transport = new TCP_Transport(a, b);
this->fill_default_parameters();
}
RDMA_Transport::RDMA_Transport(TCP_Transport *tcp_conn) {
this->rdma_config = new RDMA_config();
this->tcp_transport = tcp_conn;
this->fill_default_parameters();
}
RDMA_Transport::~RDMA_Transport() {
delete(this->tcp_transport);
this->tcp_transport = NULL;
delete(this->rdma_config);
this->rdma_config = NULL;
}
void RDMA_Transport::set_conn_ip(std::string ip) {
this->tcp_transport->set_ip(ip);
}
void RDMA_Transport::set_tcp_port(int port) {
this->tcp_transport->set_port(port);
}
int RDMA_Transport::rdma_open_dev() {
struct ibv_device **dev_list = NULL;
int num_devs;
//struct ibv_context *dev_ctx = NULL;
dev_list = ibv_get_device_list(&num_devs);
if(dev_list==NULL) {
//error
return -1;
}
this->rdma_config->dev.ctx = ibv_open_device(dev_list[this->rdma_config->dev.dev_num]);
if(this->rdma_config->dev.ctx==NULL) {
//error
return -1;
}
ibv_free_device_list(dev_list);
return 0;
}
int RDMA_Transport::rdma_allocate_pd() {
if(this->rdma_config->dev.ctx==NULL) {
//error
return -1;
}
this->rdma_config->pd = ibv_alloc_pd(this->rdma_config->dev.ctx);
if(this->rdma_config->pd == NULL) {
//error
return -1;
}
return 0;
}
void RDMA_Transport::set_mr_size(size_t size) {
this->rdma_config->mr.mr_size = size;
}
int RDMA_Transport::rdma_register_mr() {
if(this->rdma_config->pd==NULL) {
//error
return -1;
}
this->rdma_config->mr.mr_buf_addr = (char*) malloc(this->rdma_config->mr.mr_size);
memset(this->rdma_config->mr.mr_buf_addr, 0, this->rdma_config->mr.mr_size);
this->rdma_config->mr.mr = ibv_reg_mr(this->rdma_config->pd,
(void*)this->rdma_config->mr.mr_buf_addr,
this->rdma_config->mr.mr_size,
this->rdma_config->mr.mr_flags);
if(this->rdma_config->mr.mr == NULL) {
//error
return -1;
}
return 0;
}
int RDMA_Transport::rdma_init_cq() {
if(this->rdma_config->dev.ctx == NULL) {
//error
return -1;
}
if(ibv_query_device(this->rdma_config->dev.ctx, this->rdma_config->dev.dev_attr)) {
//error
return -1;
}
this->rdma_config->_cq->cq = ibv_create_cq(this->rdma_config->dev.ctx,
this->rdma_config->_cq->depth,
this->rdma_config->_cq->cq_context,
this->rdma_config->_cq->channel,
this->rdma_config->_cq->comp_vector);
if(this->rdma_config->_cq->cq==NULL) {
//error
perror("Unable to create CQ");
return -1;
}
this->rdma_config->_qp->qp_init_attr.send_cq = this->rdma_config->_cq->cq;
this->rdma_config->_qp->qp_init_attr.recv_cq = this->rdma_config->_cq->cq;
return 0;
}
int RDMA_Transport::rdma_init_qp() {
if(this->rdma_config->pd==NULL) {
//error
perror("PD not created");
return -1;
}
if(this->rdma_config->_cq->cq == NULL) {
//error
perror("CQ not created");
return -1;
}
struct qp_config *config = this->rdma_config->_qp;
// struct ibv_qp_init_attr qp_init_attr;
// memset(&qp_init_attr, 0, sizeof(qp_init_attr));
config->qp_init_attr.sq_sig_all = 1;
config->qp_init_attr.send_cq = this->rdma_config->_cq->cq;
config->qp_init_attr.recv_cq = this->rdma_config->_cq->cq;
config->qp_init_attr.qp_type = IBV_QPT_RC; //default for now
config->qp_init_attr.cap = {
.max_send_wr = 3,
.max_recv_wr = 3,
.max_send_sge = 1,
.max_recv_sge = 1
};
this->rdma_config->_qp->qp = ibv_create_qp(this->rdma_config->pd,
&config->qp_init_attr);
if(this->rdma_config->_qp->qp==NULL) {
//error
perror("Unable to create qp");
return -1;
}
return 0;
}
int RDMA_Transport::rdma_query_port() {
if(this->rdma_config->dev.ctx==NULL) {
//error
perror("rdma_query_port: Device context not set");
return -1;
}
if(this->rdma_config->ib_port<0) {
//error
perror("rdma_query_port: IB PORT not set");
return -1;
}
if(this->rdma_config->gid_idx<0) {
//error
perror("rdma_query_port: GID INDEX not set");
return -1;
}
if(this->rdma_config->dev.port_attr == NULL) {
this->rdma_config->dev.port_attr = new ibv_port_attr();
}
if(ibv_query_port(this->rdma_config->dev.ctx,
this->rdma_config->ib_port,
this->rdma_config->dev.port_attr)) {
//error
perror("rdma_query_port: Unable to query port");
return -1;
}
return 0;
}
bool RDMA_Transport::rdma_port_isactive() {
this->rdma_query_port();
if(this->rdma_config->dev.port_attr->state == IBV_PORT_ACTIVE) {
return true;
}
return false;
}
int RDMA_Transport::qp_state_to_reset() {
int flags = this->rdma_config->mr.mr_flags;
if(ibv_rereg_mr(this->rdma_config->mr.mr,
flags,
this->rdma_config->pd,
this->rdma_config->mr.mr_buf_addr,
this->rdma_config->mr.mr_size,
flags)) {
//error
return -1;
}
flags = 0;
struct ibv_qp_attr reset_attr;
memset(&reset_attr, 0, sizeof(reset_attr));
reset_attr.qp_state = IBV_QPS_RESET;
reset_attr.cap = {
.max_send_wr = 2,
.max_recv_wr = 2,
.max_send_sge = 1,
.max_recv_sge = 1
};
flags = IBV_QP_STATE | IBV_QP_CAP;
if(ibv_modify_qp(this->rdma_config->_qp->qp, &reset_attr, flags)) {
//error
return -1;
}
return 0;
}
int RDMA_Transport::qp_state_to_init() {
RDMA_config *config = this->rdma_config;
struct ibv_qp_attr init_attr;
memset(&init_attr, 0, sizeof(init_attr));
init_attr.qp_state = IBV_QPS_INIT;
init_attr.pkey_index = 0; //need to read more
init_attr.port_num = config->ib_port;
init_attr.qp_access_flags = config->_qp->qp_access_flags;
int flags = IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS;
if(ibv_modify_qp(config->_qp->qp, &init_attr, flags)) {
//error
perror("qp_state_to_init: Unable to modify qp to init");
return -1;
}
return 0;
}
int RDMA_Transport::qp_state_to_rtr() {
RDMA_config* config = this->rdma_config;
struct ibv_qp_attr rtr_attr;
memset(&rtr_attr, 0, sizeof(rtr_attr));
rtr_attr.qp_state = IBV_QPS_RTR;
rtr_attr.path_mtu = config->mtu;
rtr_attr.dest_qp_num = config->remote_conn.qp_num;
rtr_attr.rq_psn = 0; //need to read more
rtr_attr.max_dest_rd_atomic = 1; //need to read more
rtr_attr.min_rnr_timer = config->min_rnr_timer;
rtr_attr.ah_attr.is_global = 0; //need to read more
rtr_attr.ah_attr.dlid = config->remote_conn.lid;
rtr_attr.ah_attr.sl = 0; //need to read more
rtr_attr.ah_attr.src_path_bits = 0; //need to read more
rtr_attr.ah_attr.port_num = config->ib_port;
if(config->gid_idx >= 0) {
rtr_attr.ah_attr.is_global = 1; //need to read more
memcpy(&rtr_attr.ah_attr.grh.dgid, config->remote_conn.gid, 16);
rtr_attr.ah_attr.grh.flow_label = 0; //need to read more
rtr_attr.ah_attr.grh.hop_limit = 1; //need to read more
rtr_attr.ah_attr.grh.sgid_index = config->gid_idx;
rtr_attr.ah_attr.grh.traffic_class = 0; //need to read more
}
int flags = IBV_QP_STATE |
IBV_QP_AV |
IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN |
IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC |
IBV_QP_MIN_RNR_TIMER;
if(ibv_modify_qp(config->_qp->qp, &rtr_attr, flags)) {
//error
perror("qp_state_to_rtr: Unable to modify qp to rtr");
return -1;
}
return 0;
}
int RDMA_Transport::qp_state_to_rts() {
RDMA_config* config = this->rdma_config;
struct ibv_qp_attr rts_attr;
memset(&rts_attr, 0, sizeof(rts_attr));
rts_attr.qp_state = IBV_QPS_RTS;
rts_attr.timeout = config->timeout;
rts_attr.retry_cnt = config->retry_cnt;
rts_attr.rnr_retry = 0; //need to read more
rts_attr.sq_psn = 0; //need to read more
rts_attr.max_dest_rd_atomic = 1;
int flags = IBV_QP_STATE |
IBV_QP_TIMEOUT |
IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY |
IBV_QP_SQ_PSN |
IBV_QP_MAX_QP_RD_ATOMIC;
if(ibv_modify_qp(config->_qp->qp, &rts_attr, flags)) {
//error
perror("qp_state_to_rts: Unable to modify qp to rts");
return -1;
}
return 0;
}
int RDMA_Transport::qp_state_to_sqd() {
//need to figure out, till then
return 0;
}
int RDMA_Transport::qp_state_to_error() {
//need to figure out, till then
return 0;
}
int RDMA_Transport::qp_from_reset_to_rts() {
if(this->qp_state_to_init()) {
// error
perror("qp_from_reset_to_rts: Unable to move qp to init");
return -1;
}
if(this->qp_state_to_rtr()) {
// error
perror("qp_from_reset_to_rts: Unable to move qp to rtr");
return -1;
}
if(this->qp_state_to_rts()) {
// error
perror("qp_from_reset_to_rts: Unable to move qp to rts");
return -1;
}
return 0;
}
int RDMA_Transport::rdma_query_qp(enum ibv_qp_attr_mask mask) {
qp_config* config = this->rdma_config->_qp;
int t;
t = ibv_query_qp(config->qp,
&config->qp_attr,
mask,
&config->qp_init_attr);
if(t) {
//error
perror("rdma_query_qp: Unable to query qp");
return t;
}
}
enum ibv_qp_state RDMA_Transport::get_curr_qp_state() {
this->rdma_query_qp(IBV_QP_STATE);
return (this->rdma_config->_qp->qp_attr.cur_qp_state);
}
int RDMA_Transport::one_sided_read() {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
//IBV_WR_RDMA_READ
struct ibv_send_wr sr;
struct ibv_sge sge;
struct ibv_send_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) this->rdma_config->mr.mr_buf_addr;
sge.length = this->rdma_config->mr.mr_size;
sge.lkey = this->rdma_config->mr.mr->lkey;
memset(&sr, 0, sizeof(sr));
sr.next = NULL;
sr.wr_id = 0; //need to be dynamic
sr.sg_list = &sge;
sr.num_sge = 1; //need to read more
sr.opcode = IBV_WR_RDMA_READ;
sr.send_flags = IBV_SEND_SIGNALED; //placeholder to send wr completion events
// should be dynamic
sr.wr.rdma.remote_addr = this->rdma_config->remote_conn.addr;
sr.wr.rdma.rkey = this->rdma_config->remote_conn.rkey;
if(ibv_post_send(this->rdma_config->_qp->qp, &sr, &bad_wr)) {
//error
return -1;
}
int poll_res;
poll_res = this->poll_cq();
if(poll_res == 0) {
perror("CQ err: empty CQ\n");
return -1;
}
if(poll_res == -1) {
perror("CQ err: bad status\n");
return -1;
}
if(analyze) {
end_time = chrono::steady_clock::now();
rdma_one_sided_read_time.push_back((end_time-start_time));
}
return 0;
}
int RDMA_Transport::one_sided_write() {
auto start_time = chrono::steady_clock::now();
auto end_time = chrono::steady_clock::now();
if(analyze) {
start_time = chrono::steady_clock::now();
}
//IBV_WR_RDMA_WRITE
struct ibv_send_wr sr;
struct ibv_sge sge;
struct ibv_send_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) this->rdma_config->mr.mr_buf_addr;
sge.length = this->rdma_config->mr.mr_size;
sge.lkey = this->rdma_config->mr.mr->lkey;
memset(&sr, 0, sizeof(sr));
sr.next = NULL;
sr.wr_id = 0; //need to be dynamic
sr.sg_list = &sge;
sr.num_sge = 1; //need to read more
sr.opcode = IBV_WR_RDMA_WRITE;
sr.send_flags = IBV_SEND_SIGNALED; //placeholder to send wr completion events
// should be dynamic
sr.wr.rdma.remote_addr = this->rdma_config->remote_conn.addr;
sr.wr.rdma.rkey = this->rdma_config->remote_conn.rkey;
if(ibv_post_send(this->rdma_config->_qp->qp, &sr, &bad_wr)) {
//error
return -1;
}
int poll_res;
poll_res = this->poll_cq();
if(poll_res == 0) {
perror("CQ err: empty CQ\n");
return -1;
}
if(poll_res == -1) {
perror("CQ err: bad status\n");
return -1;
}
if(analyze) {
end_time = chrono::steady_clock::now();
rdma_one_sided_write_time.push_back((end_time-start_time));
}
return 0;
}
int RDMA_Transport::two_sided_send() {
//IBV_WR_SEND
struct ibv_send_wr sr;
struct ibv_sge sge;
struct ibv_send_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) this->rdma_config->mr.mr_buf_addr;
sge.length = this->rdma_config->mr.mr_size;
sge.lkey = this->rdma_config->mr.mr->lkey;
memset(&sr, 0, sizeof(sr));
sr.next = NULL;
sr.wr_id = 0; //need to be dynamic
sr.sg_list = &sge;
sr.num_sge = 1; //need to read more
sr.opcode = IBV_WR_SEND;
sr.send_flags = IBV_SEND_SIGNALED; //placeholder to send wr completion events
// should be dynamic
if(ibv_post_send(this->rdma_config->_qp->qp, &sr, &bad_wr)) {
//error
return -1;
}
return 0;
}
int RDMA_Transport::two_sided_recv() {
//RR
struct ibv_recv_wr rr;
struct ibv_sge sge;
struct ibv_recv_wr *bad_wr;
memset(&sge, 0, sizeof(sge));
sge.addr = (uintptr_t) this->rdma_config->mr.mr_buf_addr;
sge.length = this->rdma_config->mr.mr_size;
sge.lkey = this->rdma_config->mr.mr->lkey;
memset(&rr, 0, sizeof(rr));
rr.next = NULL;
rr.wr_id = 0; //need to ebe dynamic
rr.sg_list = &sge;
rr.num_sge = 1; //need to read more
if(ibv_post_recv(this->rdma_config->_qp->qp, &rr, &bad_wr)) {
//error
return -1;
}
return 0;
}
int RDMA_Transport::poll_cq() {
RDMA_config *config = this->rdma_config;
struct ibv_wc wc;
int poll_result = -1;
auto start = chrono::steady_clock::now();
auto end = chrono::steady_clock::now();
chrono::duration<double> elapsed_time;
do {
poll_result = ibv_poll_cq(config->_cq->cq, 1, &wc); //need to think about the num_entries argument
end = chrono::steady_clock::now();
elapsed_time = (end - start);
} while(poll_result == 0 &&
(elapsed_time)<max_cq_poll_timeout);
if(poll_result < 0) {
//error
return -1;
}
else if(poll_result == 0) {
//empty cq
//some distinction needed between error and empty cq
return 0;
}
else {
if(wc.status != IBV_WC_SUCCESS) {
printf("Bad status while polling CQ: %d err\n", wc.status);
return -1;
}
}
return 1;
}
int RDMA_Transport::check_rdma_conn() {
//warning: erases mr data
char *mr_buf = this->rdma_config->mr.mr_buf_addr;
char *dummy_data = "hello";
std::string ip = this->tcp_transport->get_ip();
int t;
if(ip.empty()) {
//this is the server
//so this machine posts send two sided verb
memset(mr_buf, 0, this->rdma_config->mr.mr_size);
memcpy(mr_buf, dummy_data, 5);
this->two_sided_send();
//poll cq
t = this->poll_cq();
if(t==-1) {
//error
return -1;
}
if(t==0) {
//cq empty
return 0;
}
return 1;
}
else {
//this is the client
memset(mr_buf, 0, this->rdma_config->mr.mr_size);
this->two_sided_recv();
//poll
t = this->poll_cq();
if(t==-1) {
//error
return t;
}
if(t==0) {
//cq empty
return 0;
}
return 1;
}
}
int RDMA_Transport::check_rdma_onesided() {
//warning erases mr data
//Necessary that other party also has run the same function
TCP_Transport *tcp_transport = this->get_tcp_conn();
char *mr_addr = this->rdma_config->mr.mr_buf_addr;
char *dummy_data1 = "hello this is server"; //20
char *dummy_data2 = "hello this is client"; //20
char *buf = NULL;
printf("========================== RDMA ONESIDED TRANSPORTS TEST ==========================\n");
if(mr_addr == NULL || this->rdma_config->mr.mr_size == 0) {
//error
perror("check_rdma_onesided: MR not set");
return -1;
}
if(tcp_transport->get_ip().empty()) {
//this is the server
//server will not initiate comm
memset(mr_addr, 0, this->get_mr_size());
tcp_transport->recv_data(&buf);
printf("TCP Request for one-sided read received by server\n");
free(buf);
buf = NULL;
printf("Initiating RDMA one sided read\n");
if(this->one_sided_read()) {
//error
perror("check_rdma_onesided: one sided read fail");
return -1;
}
printf("One sided read work request posted\n");
printf("MR contents are: %s\n", mr_addr);
//now set server's mr and let client do one sided read
memset(mr_addr, 0, this->get_mr_size());
memcpy(mr_addr, dummy_data1, 20);
//tcp_transport->set_mr(dummy_data1, 20);
printf("Sending tcp prep request to client\n");
tcp_transport->send_data(dummy_data1, 20);
printf("Sent TCP prep request to client\n");
}
else {
//this is the client
//client will initiate comm
memset(mr_addr, 0, this->get_mr_size());
memcpy(mr_addr, dummy_data2, 20);
//tcp_transport->set_mr(dummy_data2, 20);
printf("Sending tcp prep request to server\n");
tcp_transport->send_data(dummy_data2, 20);
printf("Sent TCP prep request to server\n");
//now get server's mr
tcp_transport->recv_data(&buf);
printf("Got server's prep request\n");
free(buf);
buf = NULL;
printf("Initiating RDMA one sided read\n");
memset(mr_addr, 0, this->get_mr_size());
if(this->one_sided_read()) {
//error
perror("check_rdma_onesided: one sided read fail");
return -1;
}
printf("One sided read request posted\n");
printf("MR contents are: %s\n", mr_addr);
}
printf("========================== END ==========================\n");
return 0;
}
void RDMA_Transport::set_mr_flags(int flags) {
this->rdma_config->mr.mr_flags = flags;
}
int RDMA_Transport::copy_to_mr(const char* src) {
size_t mr_size = this->rdma_config->mr.mr_size;
char* mr_addr = this->rdma_config->mr.mr_buf_addr;
std::string s(src);
if(s.size()>mr_size) {
//error
return -1;
}
memcpy(mr_addr, src, s.size());
return 0;
}
int RDMA_Transport::copy_to_mr(char* buf, size_t buf_size) {
size_t mr_size = this->rdma_config->mr.mr_size;
char* mr_addr = this->rdma_config->mr.mr_buf_addr;
if(buf_size > mr_size) {
//error
return -1;
}
memcpy(mr_addr, buf, buf_size);
return 0;
}
char* RDMA_Transport::get_mr_addr() {
return this->rdma_config->mr.mr_buf_addr;
}
size_t RDMA_Transport::get_mr_size() {
return this->rdma_config->mr.mr_size;
}
TCP_Transport* RDMA_Transport::get_tcp_conn() {
return this->tcp_transport;
}
int RDMA_Transport::rdma_local_setup() {
//sets up stuff for rdma(except the connection part)
if(this->rdma_open_dev()) {
//error
perror("rdma_local_setup: Unable to open dev");
return -1;
}
if(this->rdma_allocate_pd()) {
//error
perror("rdma_local_setup: Unable to allocate pd");
return -1;
}
if(this->rdma_register_mr()) {
//error
perror("rdma_local_setup: Unable to register mr");
return -1;
}
if(this->rdma_init_cq()) {
//error
perror("rdma_local_setup: Unable to init cq");
return -1;
}
if(this->rdma_init_qp()) {
//error
perror("rdma_local_setup: Unable to init qp");
return -1;
}
if(ibv_query_gid(this->rdma_config->dev.ctx,
this->rdma_config->ib_port,
this->rdma_config->gid_idx,
this->rdma_config->gid_struct)) {
perror("rdma_local_setup: unable to query gid");
return -1;
}
memcpy(this->rdma_config->local_conn.gid,
this->rdma_config->gid_struct,
16);
return 0;
}
int RDMA_Transport::rdma_sync_config() {
RDMA_config *rdma_config = this->rdma_config;
TCP_Transport *tcp_conn = this->tcp_transport;
int t;
rdma_config->local_conn.addr = htonll((uintptr_t)rdma_config->mr.mr_buf_addr);
rdma_config->local_conn.rkey = htonl(rdma_config->mr.mr->rkey);
rdma_config->local_conn.qp_num = htonl(rdma_config->_qp->qp->qp_num);
this->rdma_query_port();
rdma_config->local_conn.lid = htons(rdma_config->dev.port_attr->lid);
t = tcp_conn->send_and_recv((char*) &(rdma_config->local_conn),
sizeof(rdma_config->local_conn),
(char*) &(rdma_config->remote_conn),
sizeof(rdma_config->remote_conn));
if(t < 0) {
//error
perror("rdma_sync_config: Unable to sync rdma info");
return -1;
}
rdma_config->remote_conn.addr = ntohll(rdma_config->remote_conn.addr);
rdma_config->remote_conn.rkey = ntohl(rdma_config->remote_conn.rkey);
rdma_config->remote_conn.qp_num = ntohl(rdma_config->remote_conn.qp_num);
rdma_config->remote_conn.lid = ntohs(rdma_config->remote_conn.lid);
return 0;
}
int RDMA_Transport::get_local_fd() {
return this->tcp_transport->get_local_fd();
}
int RDMA_Transport::get_conn_fd() {
return this->tcp_transport->get_conn_fd();
}
int RDMA_Transport::rdma_setup() {
RDMA_config *config = this->rdma_config;
TCP_Transport *tcp_conn = this->tcp_transport;
int t;
if(this->rdma_local_setup()) {
//error
perror("rdma_setup: Local setup failed");
return -1;
}
if(this->tcp_transport->get_port()==-1) {
//error
perror("rdma_setup: TCP port not set");
return -1;
}
if(tcp_conn->setup()) {
//error
perror("rdma_setup: Unable to setup TCP");
return -1;
}
t = this->rdma_sync_config();
if(t < 0) {
//error
perror("rdma_setup: rdma_sync_config issue");
return -1;
}
// config->local_conn.addr = htonll((uintptr_t)config->mr.mr_buf_addr);
// config->local_conn.rkey = htonl(config->mr.mr->rkey);
// config->local_conn.qp_num = htonl(config->_qp->qp->qp_num);
// this->rdma_query_port();
// config->local_conn.lid = htons(config->dev.port_attr->lid);
// t = tcp_conn->send_and_recv((char*) &(config->local_conn),
// sizeof(config->local_conn),
// (char*) &(config->remote_conn),
// sizeof(config->remote_conn));
// if(t<0) {
// //error
// return -1;
// }
// //now we have the remote machines conn obj
// config->remote_conn.addr = ntohll(config->remote_conn.addr);
// config->remote_conn.rkey = ntohl(config->remote_conn.rkey);
// config->remote_conn.qp_num = ntohl(config->remote_conn.qp_num);
// config->remote_conn.lid = ntohs(config->remote_conn.lid);
//state changes
// if(this->qp_state_to_init()) {
// //error
// perror("rdma_setup: Unable to move QP to init");
// return -1;
// }
// if(this->qp_state_to_rtr()) {
// //error
// perror("rdma_setup: Unable to move QP to rtr");
// return -1;
// }
// if(this->qp_state_to_rts()) {
// //error
// perror("rdma_setup: Unable to move QP to rts");
// return -1;
// }
if(this->qp_from_reset_to_rts()) {
// error
perror("rdma_setup: Error in moving qp states");
return -1;
}
return 0;
}
int RDMA_Transport::rdma_accept_conn_setup() {
RDMA_config *rdma_config = this->rdma_config;
TCP_Transport *tcp_conn = this->tcp_transport;
int t;
if(this->rdma_local_setup()) {
//error
perror("rdma_setup: Local setup failed");
return -1;
}
if(tcp_conn->get_port() < 0) {
// error
perror("rdma_setup: TCP port not set");
return -1;
}
if(tcp_conn->accept_conn()) {
// error
perror("rdma_setup: TCP accept conn failed");
return -1;
}
t = this->rdma_sync_config();
if(t < 0) {
//error
perror("rdma_accept_conn_setup: rdma_sync_config issue");
return -1;
}
//state changes
if(this->qp_from_reset_to_rts()) {
// error
perror("rdma_accept_conn_setup: Error in moving qp states");
return -1;
}
return 0;
}
int RDMA_Transport::rdma_setup_no_tcp_setup() {
if(this->tcp_transport == NULL) {
//error
perror("rdma_setup_no_tcp");
}
RDMA_config *rdma_config = this->rdma_config;
TCP_Transport *tcp_conn = this->tcp_transport;
int t;
if(this->rdma_local_setup()) {
//error
perror("rdma_setup_no_tcp_setup: local setup fail");
return -1;
}
t = this->rdma_sync_config();
if(t < 0) {
//error
perror("rdma_setup_no_tcp_setup: sync config fail");
return -1;
}
//state changes
if(this->qp_from_reset_to_rts()) {
// error
perror("rdma_setup_no_tcp_setup: state changes fail");
return -1;
}
return 0;
}
#endif
\ No newline at end of file
#ifndef __TRANSPORT_CONFIG_H__
#define __TRANSPORT_CONFIG_H__
#include <unistd.h>
#include <string>
#include <infiniband/verbs.h>
enum Transport_Type {
TCP_IP_TRANSPORT,
UDP_TRANSPORT,
RDMA_RC_TRANSPORT,
RDMA_UC_TRANSPORT
};
struct TCP_config {
std::string ip;
int port;
int conn_fd;
int local_sock_fd;
TCP_config() {
this->ip.clear();
this->port = -1;
this->conn_fd = -1;
this->local_sock_fd = -1;
}
TCP_config(int b) {
this->ip.clear();
this->port = b;
this->conn_fd = -1;
this->local_sock_fd = -1;
}
TCP_config(std::string a, int b) {
this->ip = a;
this->port = b;
this->conn_fd = -1;
this->local_sock_fd = -1;
}
TCP_config(TCP_config& t) {
this->ip = t.ip;
this->port = t.port;
this->conn_fd = t.conn_fd;
this->local_sock_fd = t.local_sock_fd;
}
~TCP_config() {
this->ip.clear();
this->port = -1;
if(this->conn_fd > 0) {
close(this->conn_fd);
this->conn_fd = -1;
}
if(this->local_sock_fd > 0) {
close(this->local_sock_fd);
this->local_sock_fd = -1;
}
}
};
struct rdma_conn {
uint64_t addr; //BUFFER ADDR
uint32_t rkey; //REMOTE KEY
uint32_t qp_num; //QP NUMBER
uint16_t lid; //LOCAL ID
uint8_t gid[16]; //GLOBAL ID
rdma_conn() {
this->addr = 0;
this->rkey = 0;
this->qp_num = 0;
this->lid = 0;
memset((void*)&this->gid, 0, 16);
}
~rdma_conn() {
this->addr = 0;
this->rkey = 0;
this->qp_num = 0;
this->lid = 0;
memset((void*)&this->gid, 0, 16);
}
};
struct rdma_dev {
struct ibv_context *ctx;
struct ibv_device_attr *dev_attr;
struct ibv_port_attr *port_attr;
int dev_num;
rdma_dev() {
this->ctx = NULL;
this->dev_attr = new ibv_device_attr();
this->port_attr = new ibv_port_attr();
this->dev_num = 0; //default value
}
~rdma_dev() {
if(this->ctx) {
//close the device
ibv_close_device(this->ctx);
//free ctx
free(this->ctx);
this->ctx = NULL;
}
if(this->dev_attr) {
free(this->dev_attr);
this->dev_attr = NULL;
}
if(this->port_attr) {
free(this->port_attr);
this->port_attr = NULL;
}
}
};
struct rdma_mr {
char* mr_buf_addr;
size_t mr_size;
struct ibv_mr *mr;
int mr_flags;
rdma_mr() {
this->mr_buf_addr = NULL;
this->mr_size = 0;
this->mr = NULL;
this->mr_flags = IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_LOCAL_WRITE;
}
~rdma_mr() {
if(ibv_dereg_mr(this->mr)) {
//error statement
perror("Unable to deregister MR");
}
if(this->mr_buf_addr) {
free(this->mr_buf_addr);
this->mr_buf_addr = NULL;
}
if(this->mr) {
free(this->mr);
this->mr = NULL;
}
}
};
struct cq_config {
struct ibv_cq* cq;
int depth;
void* cq_context;
ibv_comp_channel *channel;
int comp_vector;
cq_config() {
this->cq = NULL;
this->depth = 3; //some default value
this->cq_context = NULL;
this->channel = NULL;
this->comp_vector = 0;
}
~cq_config() {
if(this->cq) {
if(ibv_destroy_cq(this->cq)) {
//error
perror("Unable to destroy cq");
}
this->cq = NULL;
//delete(this->cq);
}
if(this->cq_context) {
free(this->cq_context);
this->cq_context = NULL;
}
if(this->channel) {
if(ibv_destroy_comp_channel(this->channel)) {
//error
perror("Unable to destroy comp_channel");
}
this->channel = NULL;
}
}
};
struct qp_config {
struct ibv_qp *qp;
void* qp_ctx;
struct ibv_qp_attr qp_attr;
struct ibv_qp_init_attr qp_init_attr;
int qp_access_flags;
qp_config() {
this->qp = NULL;
this->qp_ctx = NULL;
this->qp_access_flags = IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_LOCAL_WRITE;
memset(&this->qp_init_attr, 0, sizeof(this->qp_init_attr));
this->qp_init_attr.qp_context = this->qp_ctx;
this->qp_init_attr.send_cq = NULL;
this->qp_init_attr.recv_cq = NULL;
this->qp_init_attr.qp_type = IBV_QPT_RC;
this->qp_init_attr.sq_sig_all = 1;
this->qp_init_attr.cap = {
.max_send_wr = 2,
.max_recv_wr = 2,
.max_send_sge = 1,
.max_recv_sge = 1,
.max_inline_data = 2
};
this->qp_init_attr.srq = NULL;
}
qp_config(enum ibv_qp_type qp_type) {
this->qp = NULL;
this->qp_ctx = NULL;
memset(&this->qp_init_attr, 0, sizeof(this->qp_init_attr));
this->qp_init_attr.qp_context = this->qp_ctx;
this->qp_init_attr.send_cq = NULL;
this->qp_init_attr.recv_cq = NULL;
this->qp_init_attr.sq_sig_all = 1;
this->qp_init_attr.cap = {
.max_send_wr = 2,
.max_recv_wr = 2,
.max_send_sge = 1,
.max_recv_sge = 1,
.max_inline_data = 2
};
this->qp_init_attr.qp_type = qp_type;
this->qp_init_attr.srq = NULL;
}
~qp_config() {
//check if qp in error state
//if not, move qq to error state
ibv_destroy_qp(this->qp);
this->qp = NULL;
if(this->qp_ctx) {
free(qp_ctx);
this->qp_ctx = NULL;
}
}
};
struct RDMA_config {
struct TCP_config;
struct rdma_conn remote_conn;
struct rdma_conn local_conn;
struct rdma_dev dev;
struct rdma_mr mr;
struct ibv_pd *pd;
struct qp_config *_qp;
struct cq_config *_cq;
union ibv_gid *gid_struct;
enum ibv_mtu mtu;
int min_rnr_timer;
int timeout;
int retry_cnt;
int ib_port;
int gid_idx;
enum ibv_qp_state curr_state;
RDMA_config() {
//fill with some default values
this->pd = NULL;
this->_qp = new qp_config();
this->_cq = new cq_config();
this->gid_struct = new ibv_gid();
this->mtu = IBV_MTU_512;
this->min_rnr_timer = 12;
this->timeout = 12;
this->retry_cnt = 4;
this->ib_port = 1;
this->gid_idx = 1;
this->curr_state = IBV_QPS_UNKNOWN;
}
~RDMA_config() {
if(this->_qp) {
delete(this->_qp);
}
if(this->_cq) {
delete(this->_cq);
}
if(this->pd) {
if(ibv_dealloc_pd(this->pd)) {
perror("Unable to dealloc PD");
}
//free(this->pd);
this->pd = NULL;
}
if(this->gid_struct) {
free(this->gid_struct);
this->gid_struct = NULL;
}
}
};
class TCP_Transport {
protected:
TCP_config *config;
public:
char* mr;
size_t mr_size;
TCP_Transport();
TCP_Transport(std::string a, int b);
TCP_Transport(int b);
TCP_Transport(TCP_config& t);
~TCP_Transport();
std::string get_ip();
int get_port();
void set_ip(std::string addr);
void set_ip(char* addr);
void set_port(int port);
void set_mr(char* buf_addr, size_t buf_size);
void set_mr(std::string str, size_t buf_size);
int make_socket();
int start_listen();
int make_socket_listen();
int accept_conn();
int make_conn();
int setup();
int send_data(char* buf, int size);
int send_data(std::string str, int size);
int send_data();
int recv_data(char* buf, int size);
ssize_t recv_data(char** null_buffer);
int send_and_recv(char* send_buf, int send_size, char* recv_buf, int recv_size);
void set_local_fd(int fd);
void set_conn_fd(int fd);
int get_local_fd();
int get_conn_fd();
int close_conn();
};
class RDMA_Transport {
protected:
TCP_Transport *tcp_transport;
public:
RDMA_config *rdma_config;
RDMA_Transport();
RDMA_Transport(std::string a, int b);
RDMA_Transport(TCP_Transport *tcp_conn);
~RDMA_Transport();
//debug functions
RDMA_config* get_config();
void fill_default_parameters();
//required functions
int rdma_setup(); //sets up everything including the connection
int rdma_setup_no_tcp_setup();
int rdma_accept_conn_setup();
int rdma_local_setup(); //sets up whatever can be achieved locally
int rdma_sync_config();
void set_conn_ip(std::string ip);
void set_tcp_port(int port);
int rdma_open_dev();
int rdma_allocate_pd();
void set_mr_size(size_t size);
int rdma_register_mr();
int rdma_init_cq();
int rdma_init_qp();
int rdma_query_port();
bool rdma_port_isactive();
int rdma_query_gid();
void set_mr_flags(int flags);
int copy_to_mr(const char* src);
int copy_to_mr(char* buf, size_t buf_size);
char* get_mr_addr();
size_t get_mr_size();
TCP_Transport* get_tcp_conn();
int get_local_fd();
int get_conn_fd();
int qp_state_to_reset();
int qp_state_to_init();
int qp_state_to_rtr();
int qp_state_to_rts();
int qp_state_to_sqd();
int qp_state_to_error();
int qp_from_reset_to_rts();
int rdma_query_qp(enum ibv_qp_attr_mask mask);
enum ibv_qp_state get_curr_qp_state();
int check_rdma_conn();
int check_rdma_onesided();
int one_sided_read(); //RDMA_READ
int one_sided_write(); //RDMA_WRITE
int two_sided_send(); //WR_SEND
int two_sided_recv(); //RR
int poll_cq();
};
#endif
\ No newline at end of file
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string>
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
int send_obj(struct resource_base *base, char *obj, int size) {
int op_bytes = 0;
op_bytes = write(base->conn_fd, obj, size);
if(op_bytes < size)
D(err_msg("write: Unable to write", false, base); return 1);
return 0;
}
int read_obj(struct resource_base *base, char *ret_obj, int size) {
int op_bytes = 0;
int tot_bytes = 0;
while(tot_bytes < size) {
op_bytes = read(base->conn_fd, ret_obj, size);
if(op_bytes == 0) break;
else if(op_bytes > 0) tot_bytes += op_bytes;
else break;
}
if(tot_bytes < size)
D(err_msg("read; Unable to read", false, base); return 1);
return 0;
}
int send_and_check(struct resource_base *base, char *snd_obj, char *ret_obj, int send_size, int ret_size) {
if(send_obj(base, snd_obj, send_size))
D(err_msg("send_and_check;send_obj", false, base); return 1);
if(read_obj(base, ret_obj, ret_size))
D(err_msg("send_and_check;read_obj", false, base); return 1);
return 0;
}
#include "rdma_helper.hpp"
#ifndef __TRANSPORT_HELPER__
#define __TRANSPORT_HELPER__
int send_obj(struct resource_base *base, char *send_obj, int size);
int read_obj(struct resource_base *base, char *ret_obj, int size);
int send_and_check(struct resource_base *base, char *snd_obj, char *ret_obj, int send_size, int ret_size);
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment