Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hpdos
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
SYNERG
hpdos
Commits
d2d47b86
Commit
d2d47b86
authored
Jul 15, 2022
by
Smit Gangurde
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
RAMCloud Offload, Smit MTP
parent
33075e8b
Changes
88
Hide whitespace changes
Inline
Side-by-side
Showing
88 changed files
with
13383 additions
and
0 deletions
+13383
-0
Smit_MTP_RamCloud_Replication_Offload/.gitignore
Smit_MTP_RamCloud_Replication_Offload/.gitignore
+6
-0
Smit_MTP_RamCloud_Replication_Offload/ErrMssg.h
Smit_MTP_RamCloud_Replication_Offload/ErrMssg.h
+6
-0
Smit_MTP_RamCloud_Replication_Offload/Offloaded/RDMA/config.conf
...P_RamCloud_Replication_Offload/Offloaded/RDMA/config.conf
+2
-0
Smit_MTP_RamCloud_Replication_Offload/Offloaded/TCP/config_file.conf
...mCloud_Replication_Offload/Offloaded/TCP/config_file.conf
+2
-0
Smit_MTP_RamCloud_Replication_Offload/Readme.txt
Smit_MTP_RamCloud_Replication_Offload/Readme.txt
+19
-0
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.cpp
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.cpp
+0
-0
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.h
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.h
+8
-0
Smit_MTP_RamCloud_Replication_Offload/client.cpp
Smit_MTP_RamCloud_Replication_Offload/client.cpp
+112
-0
Smit_MTP_RamCloud_Replication_Offload/config/client_config.conf
...TP_RamCloud_Replication_Offload/config/client_config.conf
+15
-0
Smit_MTP_RamCloud_Replication_Offload/config/config_parameters.hpp
...RamCloud_Replication_Offload/config/config_parameters.hpp
+55
-0
Smit_MTP_RamCloud_Replication_Offload/config/master_server_config.conf
...loud_Replication_Offload/config/master_server_config.conf
+0
-0
Smit_MTP_RamCloud_Replication_Offload/config/nic_config.conf
Smit_MTP_RamCloud_Replication_Offload/config/nic_config.conf
+14
-0
Smit_MTP_RamCloud_Replication_Offload/config/read_config.cc
Smit_MTP_RamCloud_Replication_Offload/config/read_config.cc
+288
-0
Smit_MTP_RamCloud_Replication_Offload/config/read_config.hpp
Smit_MTP_RamCloud_Replication_Offload/config/read_config.hpp
+56
-0
Smit_MTP_RamCloud_Replication_Offload/config/read_configTest.cc
...TP_RamCloud_Replication_Offload/config/read_configTest.cc
+15
-0
Smit_MTP_RamCloud_Replication_Offload/config/server_config.conf
...TP_RamCloud_Replication_Offload/config/server_config.conf
+14
-0
Smit_MTP_RamCloud_Replication_Offload/config/threaded_client.conf
..._RamCloud_Replication_Offload/config/threaded_client.conf
+16
-0
Smit_MTP_RamCloud_Replication_Offload/experiments/client.cpp
Smit_MTP_RamCloud_Replication_Offload/experiments/client.cpp
+156
-0
Smit_MTP_RamCloud_Replication_Offload/experiments/non_offloaded.cpp
...amCloud_Replication_Offload/experiments/non_offloaded.cpp
+153
-0
Smit_MTP_RamCloud_Replication_Offload/experiments/nonoffloaded_reject.cpp
...d_Replication_Offload/experiments/nonoffloaded_reject.cpp
+141
-0
Smit_MTP_RamCloud_Replication_Offload/experiments/offloaded_reject.cpp
...loud_Replication_Offload/experiments/offloaded_reject.cpp
+168
-0
Smit_MTP_RamCloud_Replication_Offload/experiments/server.cpp
Smit_MTP_RamCloud_Replication_Offload/experiments/server.cpp
+166
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/backup.cpp
...TP_RamCloud_Replication_Offload/final_shashank/backup.cpp
+666
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/client.cpp
...TP_RamCloud_Replication_Offload/final_shashank/client.cpp
+582
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/master.cpp
...TP_RamCloud_Replication_Offload/final_shashank/master.cpp
+574
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/nic.cpp
Smit_MTP_RamCloud_Replication_Offload/final_shashank/nic.cpp
+700
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/offload_master.cpp
...oud_Replication_Offload/final_shashank/offload_master.cpp
+666
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/utils.cpp
...MTP_RamCloud_Replication_Offload/final_shashank/utils.cpp
+102
-0
Smit_MTP_RamCloud_Replication_Offload/final_shashank/utils.h
Smit_MTP_RamCloud_Replication_Offload/final_shashank/utils.h
+35
-0
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.cc
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.cc
+107
-0
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.hpp
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.hpp
+39
-0
Smit_MTP_RamCloud_Replication_Offload/include/ThreadsafeQueueTest.cc
...mCloud_Replication_Offload/include/ThreadsafeQueueTest.cc
+34
-0
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.cc
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.cc
+150
-0
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.hpp
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.hpp
+19
-0
Smit_MTP_RamCloud_Replication_Offload/include/cli_apiTest.cc
Smit_MTP_RamCloud_Replication_Offload/include/cli_apiTest.cc
+26
-0
Smit_MTP_RamCloud_Replication_Offload/include/client_functions.cc
..._RamCloud_Replication_Offload/include/client_functions.cc
+349
-0
Smit_MTP_RamCloud_Replication_Offload/include/client_functions.hpp
...RamCloud_Replication_Offload/include/client_functions.hpp
+46
-0
Smit_MTP_RamCloud_Replication_Offload/include/common.cc
Smit_MTP_RamCloud_Replication_Offload/include/common.cc
+65
-0
Smit_MTP_RamCloud_Replication_Offload/include/common.hpp
Smit_MTP_RamCloud_Replication_Offload/include/common.hpp
+460
-0
Smit_MTP_RamCloud_Replication_Offload/include/connection_pool.cc
...P_RamCloud_Replication_Offload/include/connection_pool.cc
+319
-0
Smit_MTP_RamCloud_Replication_Offload/include/connection_pool.hpp
..._RamCloud_Replication_Offload/include/connection_pool.hpp
+47
-0
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.cc
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.cc
+341
-0
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.hpp
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.hpp
+35
-0
Smit_MTP_RamCloud_Replication_Offload/include/general_threadsafe_queue.hpp
..._Replication_Offload/include/general_threadsafe_queue.hpp
+98
-0
Smit_MTP_RamCloud_Replication_Offload/include/hash.cc
Smit_MTP_RamCloud_Replication_Offload/include/hash.cc
+15
-0
Smit_MTP_RamCloud_Replication_Offload/include/hash.hpp
Smit_MTP_RamCloud_Replication_Offload/include/hash.hpp
+9
-0
Smit_MTP_RamCloud_Replication_Offload/include/monitor.cc
Smit_MTP_RamCloud_Replication_Offload/include/monitor.cc
+207
-0
Smit_MTP_RamCloud_Replication_Offload/include/monitor.hpp
Smit_MTP_RamCloud_Replication_Offload/include/monitor.hpp
+84
-0
Smit_MTP_RamCloud_Replication_Offload/include/queue_context.hpp
...TP_RamCloud_Replication_Offload/include/queue_context.hpp
+89
-0
Smit_MTP_RamCloud_Replication_Offload/include/thread_functions.cc
..._RamCloud_Replication_Offload/include/thread_functions.cc
+1140
-0
Smit_MTP_RamCloud_Replication_Offload/include/thread_functions.hpp
...RamCloud_Replication_Offload/include/thread_functions.hpp
+30
-0
Smit_MTP_RamCloud_Replication_Offload/include/thread_pool.cc
Smit_MTP_RamCloud_Replication_Offload/include/thread_pool.cc
+69
-0
Smit_MTP_RamCloud_Replication_Offload/include/thread_pool.hpp
..._MTP_RamCloud_Replication_Offload/include/thread_pool.hpp
+37
-0
Smit_MTP_RamCloud_Replication_Offload/include/threadsafe_queue.cc
..._RamCloud_Replication_Offload/include/threadsafe_queue.cc
+83
-0
Smit_MTP_RamCloud_Replication_Offload/include/threadsafe_queue.hpp
...RamCloud_Replication_Offload/include/threadsafe_queue.hpp
+32
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_client.cc
Smit_MTP_RamCloud_Replication_Offload/integrated_client.cc
+113
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_job_clientTest.cc
...RamCloud_Replication_Offload/integrated_job_clientTest.cc
+41
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_job_serverTest.cc
...RamCloud_Replication_Offload/integrated_job_serverTest.cc
+75
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_nic.cc
Smit_MTP_RamCloud_Replication_Offload/integrated_nic.cc
+108
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_receiverTest.cc
...P_RamCloud_Replication_Offload/integrated_receiverTest.cc
+56
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_senderTest.cc
...MTP_RamCloud_Replication_Offload/integrated_senderTest.cc
+49
-0
Smit_MTP_RamCloud_Replication_Offload/integrated_server.cc
Smit_MTP_RamCloud_Replication_Offload/integrated_server.cc
+96
-0
Smit_MTP_RamCloud_Replication_Offload/metadata.hpp
Smit_MTP_RamCloud_Replication_Offload/metadata.hpp
+171
-0
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_client.cpp
..._MTP_RamCloud_Replication_Offload/nonoffloaded_client.cpp
+69
-0
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_server.cc
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_server.cc
+96
-0
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_server.cpp
..._MTP_RamCloud_Replication_Offload/nonoffloaded_server.cpp
+164
-0
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedclient.cpp
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedclient.cpp
+92
-0
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedserver.cpp
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedserver.cpp
+178
-0
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.cpp
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.cpp
+405
-0
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.hpp
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.hpp
+88
-0
Smit_MTP_RamCloud_Replication_Offload/rdma_states.cpp
Smit_MTP_RamCloud_Replication_Offload/rdma_states.cpp
+119
-0
Smit_MTP_RamCloud_Replication_Offload/rdma_states.hpp
Smit_MTP_RamCloud_Replication_Offload/rdma_states.hpp
+13
-0
Smit_MTP_RamCloud_Replication_Offload/rep_offload_server.cpp
Smit_MTP_RamCloud_Replication_Offload/rep_offload_server.cpp
+0
-0
Smit_MTP_RamCloud_Replication_Offload/server.cpp
Smit_MTP_RamCloud_Replication_Offload/server.cpp
+104
-0
Smit_MTP_RamCloud_Replication_Offload/server_nic.cpp
Smit_MTP_RamCloud_Replication_Offload/server_nic.cpp
+199
-0
Smit_MTP_RamCloud_Replication_Offload/testing/request_test_host.cc
...RamCloud_Replication_Offload/testing/request_test_host.cc
+90
-0
Smit_MTP_RamCloud_Replication_Offload/testing/request_test_nic.cc
..._RamCloud_Replication_Offload/testing/request_test_nic.cc
+83
-0
Smit_MTP_RamCloud_Replication_Offload/threaded_client.cc
Smit_MTP_RamCloud_Replication_Offload/threaded_client.cc
+137
-0
Smit_MTP_RamCloud_Replication_Offload/threaded_client1.cc
Smit_MTP_RamCloud_Replication_Offload/threaded_client1.cc
+161
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/ClientTest.cc
..._RamCloud_Replication_Offload/transport_api/ClientTest.cc
+26
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/RDMATest.cc
...TP_RamCloud_Replication_Offload/transport_api/RDMATest.cc
+17
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/ServerTest.cc
..._RamCloud_Replication_Offload/transport_api/ServerTest.cc
+32
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/TCPTestClient.cc
...mCloud_Replication_Offload/transport_api/TCPTestClient.cc
+25
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/TCPTestServer.cc
...mCloud_Replication_Offload/transport_api/TCPTestServer.cc
+23
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/transport_config.cc
...oud_Replication_Offload/transport_api/transport_config.cc
+1405
-0
Smit_MTP_RamCloud_Replication_Offload/transport_api/transport_config.hpp
...ud_Replication_Offload/transport_api/transport_config.hpp
+417
-0
Smit_MTP_RamCloud_Replication_Offload/transport_helper.cpp
Smit_MTP_RamCloud_Replication_Offload/transport_helper.cpp
+52
-0
Smit_MTP_RamCloud_Replication_Offload/transport_helper.hpp
Smit_MTP_RamCloud_Replication_Offload/transport_helper.hpp
+12
-0
No files found.
Smit_MTP_RamCloud_Replication_Offload/.gitignore
0 → 100644
View file @
d2d47b86
*.gch
*.out
*.o
*code-workspace
.vscode
.*~
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/ErrMssg.h
0 → 100644
View file @
d2d47b86
#ifndef ERR_MSSG_H
#define ERR_MSSG_H
#define D(x) do{x;}while(0)
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/Offloaded/RDMA/config.conf
0 → 100644
View file @
d2d47b86
TRANSPORT_TYPE
=
RDMA_RC
NUM_THREADS
=
8
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/Offloaded/TCP/config_file.conf
0 → 100644
View file @
d2d47b86
TRANSPORT_TYPE
=
TCP
NUM_THREAD
=
8
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/Readme.txt
0 → 100644
View file @
d2d47b86
###############_Work in Progress_###############
Things to look out for: cpu speed difference between host and nic can cause an issue while programming in RDMA
Current Issue: Memory leak on exit of connection
Keeps creating buffers for dead connection messages (why is select even returning dead connections?)
Fix to be implemented: Clean closing of connections
Compilation commands:
NIC:
g++ -g integrated_nic.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/dispatcher.cc include/client_functions.cc include/hash.cc include/Buffer.cc include/cli_api.cc -libverbs -lpthread -mcmodel=small
SERVER:
g++ -g integrated_server.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/cli_api.cc include/dispatcher.cc include/client_functions.cc include/hash.cc include/Buffer.cc -libverbs -lpthread -mcmodel=medium
CLIENT:
g++ -g threaded_client1.cc include/common.cc transport_api/transport_config.cc config/read_config.cc include/connection_pool.cc include/thread_pool.cc include/threadsafe_queue.cc include/log.cc include/thread_functions.cc include/dispatcher.cc include/client_functions.cc include/cli_api.cc include/hash.cc include/Buffer.cc -libverbs -lpthread -mcmodel=medium
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.cpp
0 → 100644
View file @
d2d47b86
Smit_MTP_RamCloud_Replication_Offload/ReplicaManager.h
0 → 100644
View file @
d2d47b86
class
ReplicaManager
{
public:
private:
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/client.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <sys/time.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const
short
int
MODE
=
0
;
char
*
SERVER_HOST
=
"192.168.200.20"
;
char
*
SERVER_NIC
=
"192.168.200.21"
;
const
int
NIC_PORT
=
8090
;
const
double
err_fraction
=
0.5
;
const
short
int
dev_num
=
0
;
int
main
(
int
argc
,
char
*
argv
[])
{
char
*
temp
=
(
char
*
)
malloc
(
128
);
struct
resource_base
*
base
;
base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
base
);
//base->server_name = SERVER_NIC;
base
->
ib_port
=
IB_PORT
;
base
->
gid_idx
=
GID_IDX
;
base
->
port
=
NIC_PORT
;
open_dev
(
base
,
dev_num
);
allocate_pd
(
base
);
register_mr
(
base
);
init_cq
(
base
);
init_qp
(
base
);
struct
ibv_port_attr
port_attr
;
if
(
ibv_query_port
(
base
->
ctx
,
base
->
ib_port
,
&
port_attr
))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
base
));
if
(
port_attr
.
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
base
));
base
->
port_attr
=
&
port_attr
;
union
ibv_gid
my_gid
;
if
(
ibv_query_gid
(
base
->
ctx
,
base
->
ib_port
,
base
->
gid_idx
,
&
my_gid
))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
base
));
memcpy
(
base
->
local_conn
->
gid
,
&
my_gid
,
16
);
connect_qp
(
base
);
strcpy
(
base
->
mr_buf_addr
,
"yo"
);
sync_remote_qp
(
base
,
"R"
,
temp
,
1
);
post_send
(
base
,
IBV_WR_RDMA_WRITE
);
//base->mr_buf_addr = (char *) malloc(base->mr_size);
//strcpy(base->mr_buf_addr, "Hi from client\0");
//sock_connect(base);
//sync_remote_qp(base, base->mr_buf_addr, temp, 15);
union
object
test_obj
;
union
object
ret_obj
;
memset
(
&
test_obj
,
0
,
sizeof
(
test_obj
));
memset
(
&
ret_obj
,
0
,
sizeof
(
ret_obj
));
test_obj
.
obj
.
key
=
1
;
test_obj
.
obj
.
value
[
0
]
=
'T'
;
test_obj
.
obj
.
version
=
1
;
test_obj
.
obj
.
status
=
STATUS_OK
;
struct
timeval
temp_time
;
double
snd_ts
,
rcvd_ts
;
double
avg
=
0
;
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
// for(int i=0; i<1000; i++) {
// gettimeofday(&temp_time, NULL);
// //time in ms
// snd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
// test_obj.obj.send_ts = snd_ts;
// sync_remote_qp(base, "W", temp, 1);
// sync_remote_qp(base, (char *)&test_obj, (char *)&ret_obj, sizeof(test_obj));
// gettimeofday(&temp_time, NULL);
// rcvd_ts = ((double)temp_time.tv_sec*1000.0) + ((double)temp_time.tv_usec/1000.0);
// avg += (rcvd_ts - snd_ts);
// }
enum
Status
ret_status
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
test_obj
.
obj
.
key
=
i
;
gettimeofday
(
&
temp_time
,
NULL
);
snd_ts
=
((
double
)
temp_time
.
tv_sec
*
1000.0
)
+
((
double
)
temp_time
.
tv_usec
/
1000.0
);
test_obj
.
obj
.
send_ts
=
snd_ts
;
//send_and_check(base, (char *)&test_obj, (char *)&ret_status, sizeof(test_obj), sizeof(ret_status));
memcpy
((
void
*
)
base
->
mr_buf_addr
,
(
void
*
)
&
test_obj
,
sizeof
(
test_obj
));
send_obj
(
base
,
"R"
,
1
);
sync_remote_qp
(
base
,
"T"
,
temp
,
1
);
read_obj
(
base
,
(
char
*
)
&
ret_status
,
sizeof
(
ret_status
));
gettimeofday
(
&
temp_time
,
NULL
);
rcvd_ts
=
((
double
)
temp_time
.
tv_sec
*
1000.0
)
+
((
double
)
temp_time
.
tv_usec
/
1000.0
);
avg
+=
(
rcvd_ts
-
snd_ts
);
if
(
ret_status
==
STATUS_WRONG_VERSION
)
err_cnt
++
;
else
succ_cnt
++
;
}
std
::
cout
<<
"Errored requests: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Successful requests: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg. RTT: "
<<
avg
/
(
double
)
cache_meta_size
<<
" ms"
<<
std
::
endl
;
// if(ret_obj.obj.status == STATUS_WRONG_VERSION)
// std::cout<<"Returned with wrong version status"<<std::endl;
cleanup
(
base
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/config/client_config.conf
0 → 100644
View file @
d2d47b86
DEBUG
=
TRUE
ANALYZE
=
TRUE
INTERACTIVE_MODE
=
FALSE
TRANSPORT_TYPE
=
RDMA_RC
NUM_THREADS
=
0
CONN_PORT
=
8888
MAX_PACKET_SIZE_MBYTES
=
4
;
RDMA_MR_SIZE_MBYTES
=
1
RDMA_MTU_SIZE
=
512
RDMA_MIN_RNR_TIMER
=
12
RDMA_TIMEOUT
=
12
RDMA_CQ_POLL_TIMEOUT_MS
=
5
RDMA_RETRY_CNT
=
4
RDMA_IB_PORT
=
1
RDMA_GID_IDX
=
1
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/config_parameters.hpp
0 → 100644
View file @
d2d47b86
#ifndef __CONFIG_PARAMETERS_H__
#define __CONFIG_PARAMETERS_H__
#include <unordered_map>
#include <string>
#include "../transport_api/transport_config.hpp"
enum
params
{
DEBUG
,
ANALYZE
,
INTERACTIVE_MODE
,
TRANSPORT_TYPE
,
NUM_THREADS
,
CONN_PORT
,
NUM_REPLICAS
,
MAX_PACKET_SIZE_BYTES
,
MAX_PACKET_SIZE_MBYTES
,
RDMA_MR_SIZE_BYTES
,
RDMA_MR_SIZE_MBYTES
,
//max 8
RDMA_MTU_SIZE_BYTES
,
//one of 256,512,1024,2048,4096
RDMA_MIN_RNR_TIMER
,
RDMA_TIMEOUT
,
RDMA_CQ_POLL_TIMEOUT_MS
,
RDMA_RETRY_CNT
,
RDMA_IB_PORT
,
RDMA_GID_IDX
,
ARRIVAL_RATE
};
const
std
::
string
param_strs
[]
=
{
"DEBUG"
,
"ANALYZE"
,
"INTERACTIVE_MODE"
,
"TRANSPORT_TYPE"
,
"NUM_THREADS"
,
"CONN_PORT"
,
"NUM_REPLICAS"
,
"MAX_PACKET_SIZE_BYTES"
,
"MAX_PACKET_SIZE_MBYTES"
,
"RDMA_MR_SIZE_BYTES"
,
"RDMA_MR_SIZE_MBYTES"
,
"RDMA_MTU_SIZE_BYTES"
,
"RDMA_MIN_RNR_TIMER"
,
"RDMA_TIMEOUT"
,
"RDMA_CQ_POLL_TIMEOUT_MS"
,
"RDMA_RETRY_CNT"
,
"RDMA_IB_PORT"
,
"RDMA_GID_PORT"
,
"ARRIVAL_RATE"
};
const
int
num_params
=
17
;
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/master_server_config.conf
0 → 100644
View file @
d2d47b86
Smit_MTP_RamCloud_Replication_Offload/config/nic_config.conf
0 → 100644
View file @
d2d47b86
DEBUG
=
TRUE
ANALYZE
=
TRUE
TRANSPORT_TYPE
=
RDMA_RC
NUM_THREADS
=
3
CONN_PORT
=
8888
MAX_PACKET_SIZE_MBYTES
=
4
;
RDMA_MR_SIZE_MBYTES
=
1
RDMA_MTU_SIZE
=
512
RDMA_MIN_RNR_TIMER
=
12
RDMA_TIMEOUT
=
12
RDMA_CQ_POLL_TIMEOUT_MS
=
5
RDMA_RETRY_CNT
=
4
RDMA_IB_PORT
=
1
RDMA_GID_IDX
=
1
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/read_config.cc
0 → 100644
View file @
d2d47b86
#ifndef __READ_CONFIG_CC__
#define __READ_CONFIG_CC__
#include <chrono>
#include <iostream>
#include <string>
#include <iterator>
#include <vector>
#include <fstream>
#include <infiniband/verbs.h>
#include "../transport_api/transport_config.hpp"
#include "read_config.hpp"
std
::
unordered_map
<
std
::
string
,
enum
Transport_Type
>
transport_type_map
=
{
{
"TCP"
,
TCP_IP_TRANSPORT
},
{
"UDP"
,
UDP_TRANSPORT
},
{
"RDMA_RC"
,
RDMA_RC_TRANSPORT
},
{
"RDMA_UC"
,
RDMA_UC_TRANSPORT
},
};
std
::
unordered_map
<
std
::
string
,
enum
params
>
param_map
=
{
{
"DEBUG"
,
DEBUG
},
{
"ANALYZE"
,
ANALYZE
},
{
"INTERACTIVE_MODE"
,
INTERACTIVE_MODE
},
{
"TRANSPORT_TYPE"
,
TRANSPORT_TYPE
},
{
"NUM_THREADS"
,
NUM_THREADS
},
{
"CONN_PORT"
,
CONN_PORT
},
{
"NUM_REPLICAS"
,
NUM_REPLICAS
},
{
"MAX_PACKET_SIZE_BYTES"
,
MAX_PACKET_SIZE_BYTES
},
{
"MAX_PACKET_SIZE_MBYTES"
,
MAX_PACKET_SIZE_MBYTES
},
{
"RDMA_MR_SIZE_BYTES"
,
RDMA_MR_SIZE_BYTES
},
{
"RDMA_MR_SIZE_MBYTES"
,
RDMA_MR_SIZE_MBYTES
},
{
"RDMA_MTU_SIZE_BYTES"
,
RDMA_MTU_SIZE_BYTES
},
{
"RDMA_MIN_RNR_TIMER"
,
RDMA_MIN_RNR_TIMER
},
{
"RDMA_TIMEOUT"
,
RDMA_TIMEOUT
},
{
"RDMA_CQ_POLL_TIMEOUT_MS"
,
RDMA_CQ_POLL_TIMEOUT_MS
},
{
"RDMA_RETRY_CNT"
,
RDMA_RETRY_CNT
},
{
"RDMA_IB_PORT"
,
RDMA_IB_PORT
},
{
"RDMA_GID_IDX"
,
RDMA_GID_IDX
},
{
"ARRIVAL_RATE"
,
ARRIVAL_RATE
},
};
std
::
string
strip_whitespaces
(
std
::
string
str
)
{
std
::
string
t
;
std
::
string
::
iterator
it
=
str
.
begin
();
while
(
it
!=
str
.
end
()
&&
(
*
it
)
==
' '
)
it
++
;
while
(
it
!=
str
.
end
()
&&
(
*
it
)
!=
' '
)
{
t
.
push_back
((
*
it
));
it
++
;
}
return
t
;
}
Params
::
Params
()
{
//pass
}
Params
::
Params
(
std
::
string
f
)
{
this
->
filename
=
f
;
this
->
debug
=
false
;
this
->
analyze
=
false
;
this
->
interactive_mode
=
true
;
this
->
transport_type
=
TCP_IP_TRANSPORT
;
this
->
num_threads
=
0
;
this
->
conn_port
=
8080
;
this
->
num_replicas
=
0
;
this
->
max_packet_size_bytes
=
0
;
this
->
rdma_mr_size_bytes
=
0
;
this
->
rdma_mtu_size_bytes
=
IBV_MTU_512
;
this
->
rdma_min_rnr_timer
=
0
;
this
->
rdma_timeout
=
0
;
this
->
rdma_cq_poll_timeout_ms
=
std
::
chrono
::
duration
<
double
>
{
0.0
};
this
->
rdma_retry_cnt
=
0
;
this
->
rdma_ib_port
=
0
;
this
->
rdma_gid_idx
=
0
;
this
->
arrival_rate
=
0.0
;
}
//debug functions
void
Params
::
print_map
()
{
for
(
auto
x
:
this
->
param_val_map
)
{
std
::
cout
<<
x
.
first
<<
" : "
<<
x
.
second
<<
std
::
endl
;
}
return
;
}
void
Params
::
print_vals
()
{
std
::
cout
<<
"DEBUG: "
;
if
(
this
->
debug
)
std
::
cout
<<
"TRUE"
<<
std
::
endl
;
else
std
::
cout
<<
"FALSE"
<<
std
::
endl
;
std
::
cout
<<
"ANALYZE: "
;
if
(
this
->
analyze
)
std
::
cout
<<
"TRUE"
<<
std
::
endl
;
else
std
::
cout
<<
"FALSE"
<<
std
::
endl
;
std
::
cout
<<
"MODE: "
;
if
(
this
->
interactive_mode
)
std
::
cout
<<
"INTERACTIVE MODE"
<<
std
::
endl
;
else
std
::
cout
<<
"BATCH MODE"
<<
std
::
endl
;
std
::
cout
<<
"Transport: "
;
switch
(
this
->
transport_type
)
{
case
TCP_IP_TRANSPORT
:
std
::
cout
<<
"TCP"
<<
std
::
endl
;
break
;
case
UDP_TRANSPORT
:
std
::
cout
<<
"UDP"
<<
std
::
endl
;
break
;
case
RDMA_RC_TRANSPORT
:
std
::
cout
<<
"RDMA RC"
<<
std
::
endl
;
break
;
case
RDMA_UC_TRANSPORT
:
std
::
cout
<<
"RDMA UC"
<<
std
::
endl
;
break
;
}
std
::
cout
<<
"Num thread: "
<<
this
->
num_threads
<<
std
::
endl
;
std
::
cout
<<
"Conn Port: "
<<
this
->
conn_port
<<
std
::
endl
;
std
::
cout
<<
"Num Replicas: "
<<
this
->
num_replicas
<<
std
::
endl
;
std
::
cout
<<
"Max Packet Size (Bytes): "
<<
this
->
max_packet_size_bytes
<<
std
::
endl
;
std
::
cout
<<
"RDMA MR size (Bytes): "
<<
this
->
rdma_mr_size_bytes
<<
std
::
endl
;
std
::
cout
<<
"RDMA MTU size (Bytes): "
;
switch
(
this
->
rdma_mtu_size_bytes
)
{
case
IBV_MTU_256
:
std
::
cout
<<
"256B"
<<
std
::
endl
;
break
;
case
IBV_MTU_512
:
std
::
cout
<<
"512B"
<<
std
::
endl
;
break
;
case
IBV_MTU_1024
:
std
::
cout
<<
"1024B"
<<
std
::
endl
;
break
;
case
IBV_MTU_2048
:
std
::
cout
<<
"2048B"
<<
std
::
endl
;
break
;
case
IBV_MTU_4096
:
std
::
cout
<<
"4096B"
<<
std
::
endl
;
break
;
}
std
::
cout
<<
"RDMA min rnr timer: "
<<
this
->
rdma_min_rnr_timer
<<
std
::
endl
;
std
::
cout
<<
"RDMA timeout: "
<<
this
->
rdma_timeout
<<
std
::
endl
;
std
::
cout
<<
"RDMA CQ poll timeout: "
<<
this
->
rdma_cq_poll_timeout_ms
.
count
()
<<
std
::
endl
;
std
::
cout
<<
"RDMA IB Port: "
<<
this
->
rdma_ib_port
<<
std
::
endl
;
std
::
cout
<<
"RDMA GID IDX: "
<<
this
->
rdma_gid_idx
<<
std
::
endl
;
std
::
cout
<<
"ARRIVAL RATE: "
<<
this
->
arrival_rate
<<
" requests/second"
<<
std
::
endl
;
return
;
}
// Just Reads config file
// to fill an internal map
void
Params
::
read_config_file
()
{
std
::
string
line
;
std
::
ifstream
config_file
;
config_file
.
open
(
this
->
filename
);
int
pos
;
while
(
getline
(
config_file
,
line
))
{
if
(
line
[
0
]
==
'\n'
||
(
line
[
0
]
==
'/'
&&
line
[
1
]
==
'/'
))
continue
;
pos
=
line
.
find_first_of
(
'='
);
if
(
pos
==
std
::
string
::
npos
)
continue
;
this
->
param_val_map
[
line
.
substr
(
0
,
pos
)]
=
line
.
substr
(
pos
+
1
,
line
.
size
()
-
pos
);
}
return
;
}
// Fills parameter variables
// using internal parameter map
void
Params
::
fill_params
()
{
std
::
string
t1
,
t2
;
double
tmp
;
enum
params
param_type
;
if
(
this
->
param_val_map
.
empty
())
{
//error
return
;
}
for
(
auto
p
:
this
->
param_val_map
)
{
t1
=
strip_whitespaces
(
p
.
first
);
t2
=
strip_whitespaces
(
p
.
second
);
if
(
param_map
.
count
(
t1
)
==
0
)
continue
;
param_type
=
param_map
[
t1
];
switch
(
param_type
)
{
case
DEBUG
:
if
(
t2
.
compare
(
"TRUE"
)
==
0
)
{
this
->
debug
=
true
;
}
else
{
this
->
debug
=
false
;
}
break
;
case
ANALYZE
:
if
(
t2
.
compare
(
"TRUE"
)
==
0
)
{
this
->
analyze
=
true
;
}
else
{
this
->
analyze
=
false
;
}
break
;
case
INTERACTIVE_MODE
:
if
(
t2
.
compare
(
"TRUE"
)
==
0
)
{
this
->
interactive_mode
=
true
;
}
else
{
this
->
interactive_mode
=
false
;
}
case
TRANSPORT_TYPE
:
this
->
transport_type
=
transport_type_map
[
t2
];
break
;
case
NUM_THREADS
:
this
->
num_threads
=
std
::
stoi
(
t2
);
break
;
case
CONN_PORT
:
this
->
conn_port
=
std
::
stoi
(
t2
);
break
;
case
NUM_REPLICAS
:
this
->
num_replicas
=
std
::
stoi
(
t2
);
break
;
case
MAX_PACKET_SIZE_BYTES
:
this
->
max_packet_size_bytes
=
std
::
stoi
(
t2
);
break
;
case
MAX_PACKET_SIZE_MBYTES
:
this
->
max_packet_size_bytes
=
(
int
)
(
std
::
stoi
(
t2
)
*
(
1
<<
20
));
break
;
case
RDMA_MR_SIZE_BYTES
:
this
->
rdma_mr_size_bytes
=
(
size_t
)
(
std
::
stoi
(
t2
));
break
;
case
RDMA_MR_SIZE_MBYTES
:
this
->
rdma_mr_size_bytes
=
(
size_t
)
(
std
::
stoi
(
t2
)
*
(
1
<<
20
));
break
;
case
RDMA_MTU_SIZE_BYTES
:
switch
(
std
::
stoi
(
t2
))
{
case
256
:
this
->
rdma_mtu_size_bytes
=
IBV_MTU_256
;
break
;
case
512
:
this
->
rdma_mtu_size_bytes
=
IBV_MTU_512
;
break
;
case
1024
:
this
->
rdma_mtu_size_bytes
=
IBV_MTU_1024
;
break
;
case
2048
:
this
->
rdma_mtu_size_bytes
=
IBV_MTU_2048
;
break
;
case
4096
:
this
->
rdma_mtu_size_bytes
=
IBV_MTU_4096
;
break
;
default:
//error
break
;
}
break
;
case
RDMA_MIN_RNR_TIMER
:
this
->
rdma_min_rnr_timer
=
std
::
stoi
(
t2
);
break
;
case
RDMA_TIMEOUT
:
this
->
rdma_timeout
=
std
::
stoi
(
t2
);
break
;
case
RDMA_CQ_POLL_TIMEOUT_MS
:
tmp
=
std
::
stod
(
t2
);
tmp
*=
1e-3
;
//convert to ms
this
->
rdma_cq_poll_timeout_ms
=
std
::
chrono
::
duration
<
double
>
{
tmp
};
break
;
case
RDMA_RETRY_CNT
:
this
->
rdma_retry_cnt
=
std
::
stoi
(
t2
);
break
;
case
RDMA_IB_PORT
:
this
->
rdma_ib_port
=
std
::
stoi
(
t2
);
break
;
case
RDMA_GID_IDX
:
this
->
rdma_gid_idx
=
std
::
stoi
(
t2
);
break
;
case
ARRIVAL_RATE
:
this
->
arrival_rate
=
std
::
stod
(
t2
);
default:
//error
break
;
}
}
return
;
}
// Reads config file and
// Fills parameter variables
void
Params
::
read_params
()
{
this
->
read_config_file
();
this
->
fill_params
();
return
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/read_config.hpp
0 → 100644
View file @
d2d47b86
#ifndef __READ_CONFIG_H__
#define __READ_CONFIG_H__
#include <chrono>
#include <string>
#include <vector>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "config_parameters.hpp"
#include "../transport_api/transport_config.hpp"
class
Params
{
private:
std
::
string
filename
;
std
::
vector
<
std
::
string
>
param_lines
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
param_val_map
;
public:
bool
debug
;
bool
analyze
;
bool
interactive_mode
;
enum
Transport_Type
transport_type
;
int
num_threads
;
int
conn_port
;
int
num_replicas
;
int
max_packet_size_bytes
;
size_t
rdma_mr_size_bytes
;
enum
ibv_mtu
rdma_mtu_size_bytes
;
int
rdma_min_rnr_timer
;
int
rdma_timeout
;
std
::
chrono
::
duration
<
double
>
rdma_cq_poll_timeout_ms
;
int
rdma_retry_cnt
;
int
rdma_ib_port
;
int
rdma_gid_idx
;
double
arrival_rate
;
Params
();
Params
(
std
::
string
f
);
//debug functions
void
print_map
();
void
print_vals
();
//required functions
void
read_params
();
void
read_config_file
();
void
fill_params
();
};
std
::
vector
<
std
::
string
>
get_param_lines
(
char
*
filename
);
enum
params
check_token
(
std
::
string
token
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/read_configTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include "read_config.hpp"
using
namespace
std
;
int
main
()
{
Params
p
(
"config.conf"
);
p
.
read_params
();
p
.
print_vals
();
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/server_config.conf
0 → 100644
View file @
d2d47b86
DEBUG
=
TRUE
ANALYZE
=
TRUE
TRANSPORT_TYPE
=
RDMA_RC
NUM_THREADS
=
3
CONN_PORT
=
8888
MAX_PACKET_SIZE_MBYTES
=
4
;
RDMA_MR_SIZE_MBYTES
=
1
RDMA_MTU_SIZE
=
512
RDMA_MIN_RNR_TIMER
=
12
RDMA_TIMEOUT
=
12
RDMA_CQ_POLL_TIMEOUT_MS
=
5
RDMA_RETRY_CNT
=
4
RDMA_IB_PORT
=
1
RDMA_GID_IDX
=
1
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/config/threaded_client.conf
0 → 100644
View file @
d2d47b86
DEBUG
=
TRUE
ANALYZE
=
TRUE
INTERACTIVE_MODE
=
FALSE
TRANSPORT_TYPE
=
RDMA_RC
NUM_THREADS
=
0
CONN_PORT
=
8888
MAX_PACKET_SIZE_MBYTES
=
4
;
RDMA_MR_SIZE_MBYTES
=
1
RDMA_MTU_SIZE
=
512
RDMA_MIN_RNR_TIMER
=
12
RDMA_TIMEOUT
=
12
RDMA_CQ_POLL_TIMEOUT_MS
=
5
RDMA_RETRY_CNT
=
4
RDMA_IB_PORT
=
1
RDMA_GID_IDX
=
1
ARRIVAL_RATE
=
80
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/experiments/client.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using
namespace
std
;
char
*
NIC_IP
=
"192.168.200.21"
;
char
*
SERVER_IP
=
"192.168.200.20"
;
char
*
CLIENT_IP
=
"192.168.200.40"
;
int
SERVER_PORT
=
8989
;
int
CLIENT_PORT
=
9898
;
int
send_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
cfd
,
obj
,
size
);
if
(
op_bytes
<
size
)
{
cout
<<
"write err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
read_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
ip_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
ip_bytes
=
read
(
cfd
,
obj
,
size
);
if
(
ip_bytes
==
0
)
break
;
else
if
(
ip_bytes
>
0
)
tot_bytes
+=
ip_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
{
cout
<<
"read err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
sock_connect
(
char
*
server_name
,
int
port
,
int
*
local_fd
,
int
*
conn_fd
)
{
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
{
cout
<<
"sfd:socket"
<<
endl
;
return
-
1
;
}
if
(
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
cout
<<
"bind"
<<
endl
;
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
*
local_fd
=
sfd
;
*
conn_fd
=
cfd
;
return
0
;
}
else
{
inet_aton
(
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
cout
<<
"connect"
<<
endl
;
return
-
1
;
}
*
conn_fd
=
sfd
;
return
0
;
}
}
struct
dummy
{
long
long
key
;
bool
valid
;
char
val
[
100
];
};
long
long
arr_size
=
900000
;
void
populate_objs
(
dummy
arr
[],
long
long
arr_size
)
{
for
(
int
i
=
0
;
i
<
arr_size
;
i
++
)
{
arr
[
i
].
key
=
i
;
arr
[
i
].
val
[
0
]
=
'H'
;
arr
[
i
].
val
[
1
]
=
'E'
;
arr
[
i
].
val
[
2
]
=
'Y'
;
arr
[
i
].
val
[
3
]
=
'\0'
;
}
}
int
main
()
{
//dummy obj[arr_size];
//populate_objs(obj, arr_size);
int
nic_sfd
,
nic_cfd
;
sock_connect
(
NIC_IP
,
CLIENT_PORT
,
&
nic_sfd
,
&
nic_cfd
);
struct
timeval
t_time
;
double
s1_time
,
s2_time
,
e_time
,
avg_time
;
avg_time
=
0.0
;
int
succ_cnt
=
0
;
int
err_cnt
=
0
;
char
*
t
=
(
char
*
)
malloc
(
1
);
int
obj_size
=
sizeof
(
struct
dummy
);
dummy
obj
;
gettimeofday
(
&
t_time
,
NULL
);
s2_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
for
(
long
long
i
=
0
;
i
<
arr_size
;
i
++
)
{
//printf("\r%lld",i);
//fflush(stdout);
gettimeofday
(
&
t_time
,
NULL
);
s1_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
obj
.
key
=
i
;
send_obj
(
nic_cfd
,
(
char
*
)
&
obj
,
obj_size
);
read_obj
(
nic_cfd
,
t
,
1
);
gettimeofday
(
&
t_time
,
NULL
);
e_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
avg_time
+=
(
e_time
-
s1_time
);
//cout<<t<<endl;
if
(
t
[
0
]
==
'S'
)
succ_cnt
++
;
else
err_cnt
++
;
//usleep(10000);
}
gettimeofday
(
&
t_time
,
NULL
);
e_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
double
overall_time
=
(
e_time
-
s2_time
)
/
1000.0
;
avg_time
=
avg_time
/
(
succ_cnt
+
err_cnt
);
cout
<<
"Success: "
<<
succ_cnt
<<
endl
;
cout
<<
"Errored: "
<<
err_cnt
<<
endl
;
cout
<<
"Avg. RTT: "
<<
avg_time
<<
" ms"
<<
endl
;
cout
<<
"Overall Time: "
<<
overall_time
<<
endl
;
cout
<<
"Througput: "
<<
(
succ_cnt
+
err_cnt
)
/
overall_time
<<
endl
;
cout
<<
"Closing connections"
<<
endl
;
close
(
nic_cfd
);
close
(
nic_sfd
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/experiments/non_offloaded.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using
namespace
std
;
char
*
NIC_IP
=
"192.168.200.21"
;
char
*
SERVER_IP
=
"192.168.200.20"
;
char
*
CLIENT_IP
=
"192.168.200.40"
;
int
SERVER_PORT
=
8989
;
int
CLIENT_PORT
=
9898
;
int
send_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
cfd
,
obj
,
size
);
if
(
op_bytes
<
size
)
{
cout
<<
"write err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
read_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
ip_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
ip_bytes
=
read
(
cfd
,
obj
,
size
);
if
(
ip_bytes
==
0
)
break
;
else
if
(
ip_bytes
>
0
)
tot_bytes
+=
ip_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
{
cout
<<
"read err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
sock_connect
(
char
*
server_name
,
int
port
,
int
*
local_fd
,
int
*
conn_fd
)
{
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
{
cout
<<
"sfd:socket"
<<
endl
;
return
-
1
;
}
if
(
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
cout
<<
"bind"
<<
endl
;
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
*
local_fd
=
sfd
;
*
conn_fd
=
cfd
;
return
0
;
}
else
{
inet_aton
(
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
cout
<<
"connect"
<<
endl
;
return
-
1
;
}
*
conn_fd
=
sfd
;
return
0
;
}
}
struct
dummy
{
long
long
key
;
bool
valid
;
char
val
[
100
];
};
void
populate_objs
(
dummy
arr
[],
long
long
arr_size
,
double
err_fraction
)
{
for
(
long
long
i
=
0
;
i
<
arr_size
;
i
++
)
{
arr
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
arr
[
i
].
valid
=
false
;
else
arr
[
i
].
valid
=
true
;
}
}
void
dummy_function
()
{
int
j
=
0
;
for
(
int
i
=
0
;
i
<
1000
;
i
++
)
j
++
;
}
double
err_fraction
=
0.0
;
long
long
arr_size
=
1000000
;
int
main
()
{
//dummy arr[arr_size];
//populate_objs(arr, arr_size, err_fraction);
int
client_sfd
,
client_cfd
;
sock_connect
(
NULL
,
SERVER_PORT
,
&
client_sfd
,
&
client_cfd
);
dummy
obj
;
int
obj_size
=
sizeof
(
obj
);
char
*
s
=
"S"
;
char
*
e
=
"E"
;
int
succ_cnt
=
0
;
int
err_cnt
=
0
;
struct
timeval
t_time
;
double
s_time
,
e_time
,
overall_time
;
gettimeofday
(
&
t_time
,
NULL
);
s_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
for
(
long
long
i
=
0
;
i
<
arr_size
;
i
++
)
{
read_obj
(
client_cfd
,
(
char
*
)
&
obj
,
obj_size
);
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
obj
.
valid
=
false
;
else
obj
.
valid
=
true
;
if
(
obj
.
valid
)
{
succ_cnt
++
;
dummy_function
();
send_obj
(
client_cfd
,
s
,
1
);
}
else
{
err_cnt
++
;
send_obj
(
client_cfd
,
e
,
1
);
}
}
gettimeofday
(
&
t_time
,
NULL
);
e_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
overall_time
=
(
e_time
-
s_time
)
/
1000.0
;
cout
<<
"Throughput: "
<<
(
succ_cnt
+
err_cnt
)
/
overall_time
<<
endl
;
cout
<<
"Closing connections"
<<
endl
;
//close(nic_cfd);
close
(
client_sfd
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/experiments/nonoffloaded_reject.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using
namespace
std
;
char
*
NIC_IP
=
"192.168.200.21"
;
char
*
SERVER_IP
=
"192.168.200.20"
;
char
*
CLIENT_IP
=
"192.168.200.40"
;
int
SERVER_PORT
=
8989
;
int
CLIENT_PORT
=
9898
;
int
send_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
cfd
,
obj
,
size
);
if
(
op_bytes
<
size
)
{
cout
<<
"write err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
read_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
ip_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
ip_bytes
=
read
(
cfd
,
obj
,
size
);
if
(
ip_bytes
==
0
)
break
;
else
if
(
ip_bytes
>
0
)
tot_bytes
+=
ip_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
{
cout
<<
"read err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
sock_connect
(
char
*
server_name
,
int
port
,
int
*
local_fd
,
int
*
conn_fd
)
{
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
{
cout
<<
"sfd:socket"
<<
endl
;
return
-
1
;
}
if
(
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
cout
<<
"bind"
<<
endl
;
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
*
local_fd
=
sfd
;
*
conn_fd
=
cfd
;
return
0
;
}
else
{
inet_aton
(
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
cout
<<
"connect"
<<
endl
;
return
-
1
;
}
*
conn_fd
=
sfd
;
return
0
;
}
}
struct
dummy
{
int
key
;
bool
valid
;
char
val
[
100
];
};
void
populate_objs
(
dummy
arr
[],
int
arr_size
,
double
err_fraction
)
{
for
(
int
i
=
0
;
i
<
arr_size
;
i
++
)
{
arr
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
arr
[
i
].
valid
=
false
;
else
arr
[
i
].
valid
=
true
;
}
}
void
dummy_function
()
{
int
j
=
0
;
for
(
int
i
=
0
;
i
<
1000
;
i
++
)
j
++
;
}
double
err_fraction
=
0.0
;
int
arr_size
=
1000
;
int
main
()
{
dummy
arr
[
arr_size
];
populate_objs
(
arr
,
arr_size
,
err_fraction
);
int
client_sfd
,
client_cfd
;
sock_connect
(
NULL
,
SERVER_PORT
,
&
client_sfd
,
&
client_cfd
);
dummy
obj
;
int
obj_size
=
sizeof
(
obj
);
char
*
s
=
"S"
;
char
*
e
=
"E"
;
int
succ_cnt
=
0
;
int
err_cnt
=
0
;
for
(
int
i
=
0
;
i
<
arr_size
;
i
++
)
{
read_obj
(
client_cfd
,
(
char
*
)
&
obj
,
obj_size
);
if
(
arr
[
obj
.
key
].
valid
)
{
succ_cnt
++
;
dummy_function
();
send_obj
(
client_cfd
,
s
,
1
);
}
else
{
err_cnt
++
;
send_obj
(
client_cfd
,
e
,
1
);
}
}
cout
<<
"Closing connections"
<<
endl
;
//close(nic_cfd);
close
(
client_sfd
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/experiments/offloaded_reject.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using
namespace
std
;
char
*
NIC_IP
=
"192.168.200.21"
;
char
*
SERVER_IP
=
"192.168.200.20"
;
char
*
CLIENT_IP
=
"192.168.200.40"
;
int
SERVER_PORT
=
8989
;
int
CLIENT_PORT
=
9898
;
int
send_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
cfd
,
obj
,
size
);
if
(
op_bytes
<
size
)
{
cout
<<
"write err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
read_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
ip_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
ip_bytes
=
read
(
cfd
,
obj
,
size
);
if
(
ip_bytes
==
0
)
break
;
else
if
(
ip_bytes
>
0
)
tot_bytes
+=
ip_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
{
cout
<<
"read err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
sock_connect
(
char
*
server_name
,
int
port
,
int
*
local_fd
,
int
*
conn_fd
)
{
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
{
cout
<<
"sfd:socket"
<<
endl
;
return
-
1
;
}
if
(
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
cout
<<
"bind"
<<
endl
;
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
*
local_fd
=
sfd
;
*
conn_fd
=
cfd
;
return
0
;
}
else
{
inet_aton
(
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
cout
<<
"connect"
<<
endl
;
return
-
1
;
}
*
conn_fd
=
sfd
;
return
0
;
}
}
struct
dummy
{
long
long
key
;
bool
valid
;
char
val
[
100
];
};
void
populate_objs
(
dummy
arr
[],
int
arr_size
,
double
err_fraction
)
{
for
(
int
i
=
0
;
i
<
arr_size
;
i
++
)
{
arr
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
arr
[
i
].
valid
=
false
;
else
arr
[
i
].
valid
=
true
;
}
}
double
err_fraction
=
0.6
;
long
long
arr_size
=
9000000
;
int
main
()
{
//dummy obj[arr_size];
srand
(
time
(
NULL
));
//populate_objs(obj, arr_size, err_fraction);
int
server_sfd
,
server_cfd
,
client_sfd
,
client_cfd
;
sock_connect
(
SERVER_IP
,
SERVER_PORT
,
&
server_sfd
,
&
server_cfd
);
cout
<<
"Connected to server"
<<
endl
;
sock_connect
(
NULL
,
CLIENT_PORT
,
&
client_sfd
,
&
client_cfd
);
cout
<<
"Connected to client"
<<
endl
;
double
s_time
,
e_time
,
avg_time
;
struct
timeval
temp_time
;
cout
<<
"Connected to server and client"
<<
endl
;
dummy
t_obj
;
int
succ_cnt
=
0
;
int
err_cnt
=
0
;
int
obj_size
=
sizeof
(
t_obj
);
char
*
temp_char
=
(
char
*
)
malloc
(
1
);
char
*
s
=
"S"
;
char
*
e
=
"E"
;
memset
((
void
*
)
&
t_obj
,
0
,
obj_size
);
for
(
long
long
i
=
0
;
i
<
arr_size
;
i
++
)
{
//cout<<i<<endl;
read_obj
(
client_cfd
,
(
char
*
)
&
t_obj
,
obj_size
);
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
t_obj
.
valid
=
false
;
else
t_obj
.
valid
=
true
;
if
(
t_obj
.
valid
)
{
succ_cnt
++
;
//cout<<t_obj.val<<endl;
//dummy_function();
send_obj
(
server_cfd
,
(
char
*
)
&
t_obj
,
obj_size
);
//cout<<"obj_sent"<<endl;
read_obj
(
server_cfd
,
temp_char
,
1
);
send_obj
(
client_cfd
,
s
,
1
);
}
else
{
err_cnt
++
;
send_obj
(
client_cfd
,
e
,
1
);
}
}
cout
<<
"Error Fraction: "
<<
err_fraction
<<
endl
;
cout
<<
"Succcessful: "
<<
succ_cnt
<<
endl
;
cout
<<
"Errored: "
<<
err_cnt
<<
endl
;
cout
<<
"....Closing connections...."
<<
endl
;
t_obj
.
val
[
0
]
=
'D'
;
send_obj
(
server_cfd
,
(
char
*
)
&
t_obj
,
obj_size
);
close
(
client_cfd
);
close
(
server_cfd
);
close
(
client_sfd
);
close
(
server_sfd
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/experiments/server.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <vector>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string.h>
using
namespace
std
;
char
*
NIC_IP
=
"192.168.200.21"
;
char
*
SERVER_IP
=
"192.168.200.20"
;
char
*
CLIENT_IP
=
"192.168.200.40"
;
int
SERVER_PORT
=
8989
;
int
CLIENT_PORT
=
9898
;
int
send_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
cfd
,
obj
,
size
);
if
(
op_bytes
<
size
)
{
cout
<<
"write err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
read_obj
(
int
cfd
,
char
*
obj
,
int
size
)
{
int
ip_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
ip_bytes
=
read
(
cfd
,
obj
,
size
);
if
(
ip_bytes
==
0
)
break
;
else
if
(
ip_bytes
>
0
)
tot_bytes
+=
ip_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
{
cout
<<
"read err"
<<
endl
;
return
-
1
;
}
return
0
;
}
int
sock_connect
(
char
*
server_name
,
int
port
,
int
*
local_fd
,
int
*
conn_fd
)
{
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
{
cout
<<
"sfd:socket"
<<
endl
;
return
-
1
;
}
if
(
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
cout
<<
"bind"
<<
endl
;
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
*
local_fd
=
sfd
;
*
conn_fd
=
cfd
;
return
0
;
}
else
{
inet_aton
(
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
cout
<<
"connect"
<<
endl
;
return
-
1
;
}
*
conn_fd
=
sfd
;
return
0
;
}
}
struct
dummy
{
long
long
key
;
bool
valid
;
char
val
[
100
];
};
void
populate_objs
(
dummy
arr
[],
long
long
arr_size
,
double
err_fraction
)
{
for
(
long
long
i
=
0
;
i
<
arr_size
;
i
++
)
{
arr
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
arr
[
i
].
valid
=
false
;
else
arr
[
i
].
valid
=
true
;
}
}
void
dummy_function
()
{
int
j
=
0
;
for
(
int
i
=
0
;
i
<
1000
;
i
++
)
j
++
;
}
double
err_fraction
=
0.0
;
long
long
arr_size
=
900000
;
int
main
()
{
//dummy arr[arr_size];
//populate_objs(arr, arr_size, err_fraction);
int
nic_sfd
,
nic_cfd
;
sock_connect
(
NULL
,
SERVER_PORT
,
&
nic_sfd
,
&
nic_cfd
);
dummy
obj
;
int
obj_size
=
sizeof
(
obj
);
char
*
s
=
(
char
*
)
malloc
(
1
);
char
*
e
=
"E"
;
int
succ_cnt
=
0
;
int
err_cnt
=
0
;
int
req_cnt
=
0
;
struct
timeval
t_time
;
double
s_time
,
s1_time
,
e_time
,
overall_time
,
overall_time1
;
overall_time1
=
0.0
;
gettimeofday
(
&
t_time
,
NULL
);
s_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
while
(
true
)
{
read_obj
(
nic_cfd
,
(
char
*
)
&
obj
,
obj_size
);
if
(
obj
.
val
[
0
]
==
'D'
)
break
;
gettimeofday
(
&
t_time
,
NULL
);
s1_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
//if((double)rand()/RAND_MAX <= err_fraction) obj.valid = false;
//else obj.valid = true;
// if(obj.valid) {
// succ_cnt++;
// dummy_function();
// send_obj(client_cfd, s, 1);
// }
// else {
// err_cnt++;
// send_obj(client_cfd, e, 1);
// }
req_cnt
++
;
dummy_function
();
send_obj
(
nic_cfd
,
s
,
1
);
gettimeofday
(
&
t_time
,
NULL
);
e_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
overall_time1
+=
(
e_time
-
s1_time
);
}
gettimeofday
(
&
t_time
,
NULL
);
e_time
=
((
double
)
t_time
.
tv_sec
*
1000.0
)
+
((
double
)
t_time
.
tv_usec
/
1000.0
);
overall_time
=
(
e_time
-
s_time
)
/
1000.0
;
overall_time1
=
overall_time1
/
1000.0
;
cout
<<
"Req count: "
<<
req_cnt
<<
endl
;
cout
<<
"Processing time: "
<<
overall_time1
<<
"s"
<<
endl
;
cout
<<
"Throughput: "
<<
(
req_cnt
)
/
overall_time1
<<
endl
;
cout
<<
"Closing connections"
<<
endl
;
//close(nic_cfd);
close
(
nic_sfd
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/final_shashank/backup.cpp
0 → 100644
View file @
d2d47b86
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9095
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using
namespace
std
;
unordered_map
<
string
,
uint64_t
>
m
;
unordered_map
<
string
,
uint64_t
>
master_data
;
// This hashmap emulates the master's DRAM
fd_set
readfds
;
// unordered_map<string, uint64_t> m;
vector
<
uint64_t
>
latv
;
// vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
struct
Backup_Write
{
static
const
Opcode
opcode
=
BACKUP_WRITE
;
static
const
ServiceType
service
=
BACKUP_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
int
main
(
int
argc
,
char
const
*
argv
[])
{
int
sock
=
0
,
valread
;
struct
sockaddr_in
serv_addr
;
char
*
hello
=
"Master preprocessing done"
;
char
buffer1
[
1024
]
=
{
0
};
struct
sockaddr_in
address
;
int
opt
=
1
;
int
addrlen
=
sizeof
(
address
);
if
((
sock
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
<
0
)
{
printf
(
"
\n
Socket creation error
\n
"
);
return
-
1
;
}
serv_addr
.
sin_family
=
AF_INET
;
serv_addr
.
sin_port
=
htons
(
PORT
);
// Convert IPv4 and IPv6 addresses from text to binary form
if
(
inet_pton
(
AF_INET
,
"10.129.2.181"
,
&
serv_addr
.
sin_addr
)
<=
0
)
//192.168.200.21//10.129.2.181
{
printf
(
"
\n
Invalid address/ Address not supported
\n
"
);
return
-
1
;
}
if
(
connect
(
sock
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
printf
(
"
\n
Connection Failed
\n
"
);
return
-
1
;
}
int
sd
,
max_sd
;
int
csd
,
msd
;
int
max_clients
=
1
;
int
activity
;
int
client_socket
=
0
;
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
//setting up master's DRAM before responding to the synchronisation packet
for
(
int
i
=
0
;
i
<
2000000
;
i
++
)
{
master_data
.
insert
({
"1$"
+
to_string
(
i
),
1
});
}
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
while
(
1
)
{
//clear the socket set
FD_ZERO
(
&
readfds
);
//add master socket to set
FD_SET
(
sock
,
&
readfds
);
max_sd
=
sock
;
//socket descriptor
sd
=
client_socket
;
//if valid socket descriptor then add to read list
if
(
sd
>
0
)
FD_SET
(
sd
,
&
readfds
);
//highest file descriptor number, need it for the select function
if
(
sd
>
max_sd
)
max_sd
=
sd
;
//wait for an activity on one of the sockets , timeout is NULL ,
//so wait indefinitely
activity
=
select
(
max_sd
+
1
,
&
readfds
,
NULL
,
NULL
,
NULL
);
if
((
activity
<
0
)
&&
(
errno
!=
EINTR
))
{
printf
(
"select error"
);
}
//If something happened on the master socket ,
if
(
FD_ISSET
(
sock
,
&
readfds
))
{
int
n
=
0
;
static
char
buffer
[
2
*
M
]
=
{
0
};
n
=
read
(
sock
,
buffer
,
2
*
M
);
if
(
n
>
0
)
{
// The write request received is stored in this struct
struct
Write
::
Request
w1
;
memcpy
(
&
w1
,
buffer
,
sizeof
(
w1
));
int
a
=
w1
.
common
.
opcode
;
D
(
printf
(
"Opcode:%d
\n
"
,
a
));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if
(
a
==
WRITE
)
{
struct
Write
::
Request
w
;
memcpy
(
&
w
,
buffer
,
sizeof
(
w
));
//debugging
D
(
printf
(
"tableId:%lu
\n
"
,
w
.
tableId
));
D
(
printf
(
"key:%lu
\n
"
,
w
.
key
));
string
s
=
""
;
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s
=
s
+
to_string
(
w
.
tableId
)
+
"$"
+
to_string
(
w
.
key
);
D
(
printf
(
"HashKey:%s
\n
"
,
s
.
c_str
()));
//Master checks the reject rules to respond with failure
//if operation is atomic and there is a version number mismatch
if
(
w
.
rejectRules
.
versionNeGiven
)
{
string
s
=
""
;
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s
=
s
+
to_string
(
w
.
tableId
)
+
"$"
+
to_string
(
w
.
key
);
D
(
cout
<<
s
<<
"
\n
"
);
if
(
master_data
.
find
(
s
)
!=
master_data
.
end
())
{
D
(
std
::
cout
<<
"Key found
\n
"
);
uint64_t
curr_version_number
=
master_data
[
s
];
//compare curr_version_number with version number in w
if
(
w
.
rejectRules
.
givenVersion
!=
curr_version_number
)
{
D
(
std
::
cout
<<
"version number doesn't match
\n
"
);
//raise failure response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_WRONG_VERSION
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
version
=
curr_version_number
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
" master raised failure response"
));
}
else
{
D
(
std
::
cout
<<
"version number matches
\n
"
);
//update version number in master
master_data
[
s
]
=
master_data
[
s
]
+
(
uint64_t
)
1
;
D
(
printf
(
"Updated master_data Key:%s Version:%lu
\n
"
,
s
.
c_str
(),
master_data
[
s
]));
//raise success response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_OK
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
version
=
master_data
[
s
];
wr
.
timestamp
=
w
.
timestamp
;
//storing paramters for debugging purposes
int
stat
=
wr
.
common
.
status
;
int
table
=
wr
.
tableId
;
int
key
=
wr
.
key
;
int
vers
=
wr
.
version
;
uint64_t
ts
=
wr
.
timestamp
;
D
(
printf
(
"Master sent response Table:%d Key:%d Version:%d Status:%d Timestamp:%"
PRIu64
"
\n
"
,
table
,
key
,
vers
,
stat
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
// printf("Master sent hashmap Key:%s Version:%lu\n",s.c_str(), vers);
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"master raised success response"
));
}
}
else
{
D
(
std
::
cout
<<
"Key not found in master data
\n
"
);
struct
Write
::
Response
wr
;
//raise failure response since object does not exist in master's DRAM
wr
.
common
.
status
=
STATUS_OBJECT_DOESNT_EXIST
;
wr
.
version
=
1
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"object doesn't exist in master, sent auto failure"
));
}
}
else
{
//raise success response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_OK
;
wr
.
version
=
1
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"non transaction packet, sent auto success
\n
"
));
}
}
//termination packet for debugging
else
if
(
a
==
ILLEGAL_RPC_TYPE
)
{
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_MAX_VALUE
;
wr
.
version
=
1
;
wr
.
tableId
=
w1
.
tableId
;
wr
.
key
=
w1
.
key
;
wr
.
timestamp
=
w1
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
D
(
printf
(
"%s
\n
"
,
"testing
\n
"
));
break
;
}
// else error
else
{
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_MAX_VALUE
;
wr
.
version
=
1
;
wr
.
tableId
=
w1
.
tableId
;
wr
.
key
=
w1
.
key
;
wr
.
timestamp
=
w1
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
D
(
printf
(
"%s
\n
"
,
"testing
\n
"
));
break
;
}
}
}
}
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/final_shashank/client.cpp
0 → 100644
View file @
d2d47b86
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9090
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using
namespace
std
;
// unordered_map<string, uint64_t> m;
vector
<
uint64_t
>
latv
;
// vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
int
main
(
int
argc
,
char
const
*
argv
[])
{
// printf("check\n");
// int bypass_offload = atoi(argv[1]); // first commandline argument, if 0,
// //then the code works with SmartNIC offload,
// // else it bypasses the offload
// int proportion = atoi(argv[2]); // second commandline argument, determines what
// //proportion of writes is inconsistent
// // (if proportion = x, then the
// // inconsistent to consistent write ratio is 1:x-1)
int
sock
=
0
,
valread
;
struct
sockaddr_in
serv_addr
;
char
*
hello
=
"Client preprocessing done"
;
char
buffer1
[
1024
]
=
{
0
};
if
((
sock
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
<
0
)
{
printf
(
"
\n
Socket creation error
\n
"
);
return
-
1
;
}
serv_addr
.
sin_family
=
AF_INET
;
serv_addr
.
sin_port
=
htons
(
PORT
);
// struct timeval tv; // used in case very large packets show abnormal behaviour
// tv.tv_sec = 0;
// tv.tv_usec = 3000;
// if(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv)<0)
// {
// printf("\nsetsockopt failed \n");
// return -1;
// }
// Convert IPv4 and IPv6 addresses from text to binary form
// if(bypass_offload)
// {
// if(inet_pton(AF_INET, "192.168.220.60", &serv_addr.sin_addr)<=0)
// {
// printf("\nInvalid address/ Address not supported \n");
// return -1;
// }
// }
// else
// {
// if(inet_pton(AF_INET, "192.168.220.35", &serv_addr.sin_addr)<=0)
// {
// printf("\nInvalid address/ Address not supported \n");
// return -1;
// }
// }
if
(
inet_pton
(
AF_INET
,
"10.129.2.181"
,
&
serv_addr
.
sin_addr
)
<=
0
)
//192.168.200.21//10.129.2.181
{
printf
(
"
\n
Invalid address/ Address not supported
\n
"
);
return
-
1
;
}
if
(
connect
(
sock
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
printf
(
"
\n
Connection Failed
\n
"
);
return
-
1
;
}
// This part of the code synchronises the programs running on the master, client and the NIC
// it ensures that setup delays do not affect the experiments
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
int
num_replies
=
0
;
static
char
buffer
[
2
*
M
]
=
{
0
};
// the main body of the client begins here
struct
Write
::
Request
w
;
// The write request to be sent is generated in this struct
chrono
::
microseconds
msStart
=
chrono
::
duration_cast
<
chrono
::
milliseconds
>
(
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
());
int
i
=
0
;
int
success
=
0
;
// These parameters will measure the number of success
int
failure
=
0
;
// failure
int
error
=
0
;
// and error responses respectively
for
(
auto
start
=
std
::
chrono
::
steady_clock
::
now
(),
now
=
start
;
now
<
start
+
std
::
chrono
::
seconds
{
10
};
now
=
std
::
chrono
::
steady_clock
::
now
())
{
//the experiment runs for 60 seconds by default
i
++
;
w
.
tableId
=
1
;
// request workload is synchronised with master
// if(proportion>0) //determines proportion of inconsistent writes
// {
// if(i%proportion==0)
// {
// w.key=0;
// }
// else
// {
// w.key=i;
// }
// }
// else
// {
w
.
key
=
i
;
// printf("%" PRIu64 "\n",w.key);
// }
//setting the relevant parameters of Write::Request packet
w
.
common
.
opcode
=
WRITE
;
w
.
rejectRules
.
givenVersion
=
1
;
w
.
rejectRules
.
versionNeGiven
=
1
;
//Timestamp debugging
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts1
=
ms
.
count
();
D
(
printf
(
"Client sent Timestamp1:%"
PRIu64
"
\n
"
,
ts1
));
//setting the timestamp of Write::Request packet
w
.
timestamp
=
ms
.
count
();
//sending the packet
send
(
sock
,
&
w
,
sizeof
(
struct
Write
::
Request
),
0
);
//Debugging
D
(
printf
(
"Client sent request Timestamp:%"
PRIu64
"
\n
"
,
w
.
timestamp
));
num_replies
++
;
D
(
printf
(
"Write::Request message sent
\n
"
));
// since this client runs in interactive mode, it waits for each response before generating new requests
int
n
=
0
;
n
=
read
(
sock
,
buffer
,
2
*
M
);
if
(
n
>
0
)
{
// The write response received is stored in this struct
struct
Write
::
Response
wr
;
//copying the contents of the buffer into the struct
memcpy
(
&
wr
,
buffer
,
sizeof
(
wr
));
//storing paramters for debugging purposes
int
stat
=
wr
.
common
.
status
;
int
table
=
wr
.
tableId
;
int
key
=
wr
.
key
;
int
vers
=
wr
.
version
;
uint64_t
ts
=
wr
.
timestamp
;
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Client received Timestamp1:%"
PRIu64
"
\n
"
,
ts
));
D
(
printf
(
"Client current Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
// difference in timestamps == RTT for this packet
uint64_t
tdelta
=
ts2
-
ts
;
D
(
printf
(
"Client received response Table:%d Key:%d Version:%d Status:%d Timestamp:%"
PRIu64
"Time Delta:%"
PRIu64
"
\n
"
,
table
,
key
,
vers
,
stat
,
ts
,
tdelta
));
//STATUS_WRONG_VERSION is used for termination packets while debugging
if
(
stat
==
STATUS_WRONG_VERSION
||
stat
==
STATUS_MAX_VALUE
)
{
D
(
printf
(
"Failure received:%d
\n
"
,
stat
));
failure
++
;
latv
.
push_back
(
tdelta
);
}
//packet is all zeroes, hence error
else
if
(
ts
==
0
)
{
D
(
printf
(
"Error received:%d
\n
"
,
stat
));
error
++
;
}
else
if
(
stat
==
STATUS_OK
)
{
D
(
printf
(
"Success received:%d
\n
"
,
stat
));
success
++
;
latv
.
push_back
(
tdelta
);
}
else
{
D
(
printf
(
"Error received:%d
\n
"
,
stat
));
error
++
;
}
//Used to generate termination packets while debugging
// if(tdelta>1000000)
// {
// w.common.opcode=ILLEGAL_RPC_TYPE;
// send(sock , &w , sizeof(struct Write::Request), 0 );
// break;
// }
}
memset
(
&
buffer
[
0
],
0
,
sizeof
(
buffer
));
}
//Printing experiment results to console
//and storing the latency of each packet in latencies_unsorted.txt, latencies.txt
chrono
::
microseconds
msEnd
=
chrono
::
duration_cast
<
chrono
::
milliseconds
>
(
chrono
::
high_resolution_clock
::
now
().
time_since_epoch
());
cout
<<
"Time taken:"
<<
msEnd
.
count
()
-
msStart
.
count
()
<<
"
\n
"
;
cout
<<
"Packets Received: "
<<
i
<<
"
\n
"
<<
"Success Received: "
<<
success
<<
"
\n
"
<<
"Failure Received: "
<<
failure
<<
"
\n
"
<<
"Error Received: "
<<
error
<<
"
\n
"
;
std
::
ofstream
outFileUnsorted
(
"latencies_unsorted.txt"
);
for
(
const
auto
&
e
:
latv
)
outFileUnsorted
<<
e
<<
"
\n
"
;
std
::
sort
(
latv
.
begin
(),
latv
.
end
());
std
::
ofstream
outFile
(
"latencies.txt"
);
for
(
const
auto
&
e
:
latv
)
outFile
<<
e
<<
"
\n
"
;
auto
nth
=
latv
.
begin
()
+
(
99
*
latv
.
size
())
/
100
;
std
::
nth_element
(
latv
.
begin
(),
nth
,
latv
.
end
());
cout
<<
"Average Latency: "
<<
1.0
*
std
::
accumulate
(
latv
.
begin
(),
latv
.
end
()
-
1
,
0LL
)
/
std
::
distance
(
latv
.
begin
(),
latv
.
end
()
-
1
)
<<
"
\n
"
;
cout
<<
"Tail Latency (99%): "
<<
1.0
*
std
::
accumulate
(
nth
,
latv
.
end
()
-
1
,
0LL
)
/
std
::
distance
(
nth
,
latv
.
end
()
-
1
)
<<
"
\n
"
;
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/final_shashank/master.cpp
0 → 100644
View file @
d2d47b86
// Threaded socket server - accepting multiple clients concurrently, by creating
// a new thread for each connecting client.
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <inttypes.h>
#include <sys/time.h>
#include "utils.h"
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using
namespace
std
;
fd_set
readfds
;
unordered_map
<
string
,
uint64_t
>
m
;
//The hashmap on NIC that stores the most recent version number of each object that is created/updated
int
backupsockfd
;
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
struct
Backup_Write
{
static
const
Opcode
opcode
=
BACKUP_WRITE
;
static
const
ServiceType
service
=
BACKUP_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
typedef
struct
{
int
sockfd
;
}
thread_config_t
;
typedef
enum
{
WAIT_FOR_MSG
,
IN_MSG
}
ProcessingState
;
void
serve_connection
(
int
sockfd
)
{
if
(
send
(
sockfd
,
"*"
,
1
,
0
)
<
1
)
{
perror_die
(
"send"
);
}
//////////////////////////////////////////////////////
static
char
buffer
[
2
*
M
]
=
{
0
};
static
char
buffer2
[
2
*
M
]
=
{
0
};
while
(
1
)
{
// The write request received is stored in this struct
int
valread
=
0
;
if
((
valread
=
read
(
sockfd
,
buffer
,
2
*
M
))
==
0
)
{
printf
(
"Some business1"
);
break
;
}
// printf("III\n");
struct
Write
::
Request
w1
;
//copying the contents of the buffer into the struct
memcpy
(
&
w1
,
buffer
,
sizeof
(
w1
));
int
a
=
w1
.
common
.
opcode
;
D
(
printf
(
"Opcode:%d
\n
"
,
a
));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if
(
a
==
WRITE
)
{
struct
Write
::
Request
w
;
memcpy
(
&
w
,
buffer
,
sizeof
(
w
));
//debugging
D
(
printf
(
"tableId:%lu
\n
"
,
w
.
tableId
));
D
(
printf
(
"key:%lu
\n
"
,
w
.
key
));
//Check the reject rules to confirm
// whether the write packet was for an atomic operation
struct
Write
::
Request
br
;
// br.common.status=STATUS_OK;
// br.rejectRules.givenVersion=0;
// br.tableId=w.tableId;
// br.key=w.key;
br
.
timestamp
=
w
.
timestamp
;
//storing paramters for debugging purposes
// int stat = br.common.status;
// uint64_t vers = br.rejectRules.givenVersion;
// int table = br.tableId;
// int key = br.key;
uint64_t
ts
=
br
.
timestamp
;
D
(
printf
(
"Master sent response Timestamp:%"
PRIu64
"
\n
"
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
send
(
backupsockfd
,
&
w
,
sizeof
(
struct
Write
::
Request
),
0
);
int
valread
=
0
;
if
((
valread
=
read
(
backupsockfd
,
buffer2
,
2
*
M
))
==
0
)
{
printf
(
"Some business2"
);
}
else
{
// The write response received is stored in this struct
struct
Write
::
Response
wr
;
memcpy
(
&
wr
,
buffer2
,
sizeof
(
wr
));
//debugging
uint64_t
ts
=
wr
.
timestamp
;
D
(
printf
(
"Server received response Timestamp:%"
PRIu64
"
\n
"
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Server received response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
//send response to client
send
(
sockfd
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
}
}
}
////////////////////////////////
close
(
sockfd
);
}
void
*
server_thread
(
void
*
arg
)
{
thread_config_t
*
config
=
(
thread_config_t
*
)
arg
;
int
sockfd
=
config
->
sockfd
;
free
(
config
);
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned
long
id
=
(
unsigned
long
)
pthread_self
();
printf
(
"Thread %lu created to handle connection with socket %d
\n
"
,
id
,
sockfd
);
serve_connection
(
sockfd
);
printf
(
"Thread %lu done
\n
"
,
id
);
return
0
;
}
int
main
(
int
argc
,
char
**
argv
)
{
setvbuf
(
stdout
,
NULL
,
_IONBF
,
0
);
int
portnum
=
9090
;
int
backupnum
=
9095
;
if
(
argc
>=
2
)
{
portnum
=
atoi
(
argv
[
1
]);
}
printf
(
"Serving on port %d
\n
"
,
portnum
);
fflush
(
stdout
);
int
backupfd
=
listen_inet_socket
(
backupnum
);
struct
sockaddr_in
backup_peer_addr
;
socklen_t
backup_peer_addr_len
=
sizeof
(
backup_peer_addr
);
// int backupsockfd = // declared global so that all threads have access.
backupsockfd
=
accept
(
backupfd
,
(
struct
sockaddr
*
)
&
backup_peer_addr
,
&
backup_peer_addr_len
);
if
(
backupsockfd
<
0
)
{
perror_die
(
"ERROR on accept"
);
}
report_backup_connected
(
&
backup_peer_addr
,
backup_peer_addr_len
);
int
sockfd
=
listen_inet_socket
(
portnum
);
while
(
1
)
{
struct
sockaddr_in
peer_addr
;
socklen_t
peer_addr_len
=
sizeof
(
peer_addr
);
int
newsockfd
=
accept
(
sockfd
,
(
struct
sockaddr
*
)
&
peer_addr
,
&
peer_addr_len
);
if
(
newsockfd
<
0
)
{
perror_die
(
"ERROR on accept"
);
}
report_peer_connected
(
&
peer_addr
,
peer_addr_len
);
pthread_t
the_thread
;
thread_config_t
*
config
=
(
thread_config_t
*
)
malloc
(
sizeof
(
*
config
));
if
(
!
config
)
{
die
(
"OOM"
);
}
config
->
sockfd
=
newsockfd
;
pthread_create
(
&
the_thread
,
NULL
,
server_thread
,
config
);
// Detach the thread - when it's done, its resources will be cleaned up.
// Since the main thread lives forever, it will outlive the serving threads.
pthread_detach
(
the_thread
);
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/final_shashank/nic.cpp
0 → 100644
View file @
d2d47b86
// Threaded socket server - accepting multiple clients concurrently, by creating
// a new thread for each connecting client.
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <inttypes.h>
#include <sys/time.h>
#include "utils.h"
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using
namespace
std
;
fd_set
readfds
;
unordered_map
<
string
,
uint64_t
>
m
;
//The hashmap on NIC that stores the most recent version number of each object that is created/updated
int
backupsockfd
;
int
mastersockfd
;
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
struct
Backup_Write
{
static
const
Opcode
opcode
=
BACKUP_WRITE
;
static
const
ServiceType
service
=
BACKUP_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
typedef
struct
{
int
sockfd
;
}
thread_config_t
;
void
*
master_thread_fn
(
void
*
arg
)
{
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned
long
id
=
(
unsigned
long
)
pthread_self
();
D
(
printf
(
"Master thread %lu created
\n
"
,
id
));
static
char
buffer
[
2
*
M
]
=
{
0
};
send
(
mastersockfd
,
arg
,
sizeof
(
struct
Write
::
Request
),
0
);
// bool* retval=NULL;
int
valread
=
0
;
if
((
valread
=
read
(
mastersockfd
,
buffer
,
2
*
M
))
==
0
)
{
printf
(
"Some master business2"
);
}
else
{
// The write response received is stored in this struct
struct
Write
::
Response
wr
;
memcpy
(
&
wr
,
buffer
,
sizeof
(
wr
));
//debugging
// *retval=!wr.common.status;
uint64_t
ts
=
wr
.
timestamp
;
D
(
printf
(
"Server received response Timestamp:%"
PRIu64
"
\n
"
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Server received response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
}
D
(
printf
(
"Thread %lu done
\n
"
,
id
));
// int *retval;
// *retval=1;
// return retval;// use clearer logic
return
0
;
}
void
*
backup_thread_fn
(
void
*
arg
)
{
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned
long
id
=
(
unsigned
long
)
pthread_self
();
D
(
printf
(
"Backup thread %lu created
\n
"
,
id
));
static
char
buffer
[
2
*
M
]
=
{
0
};
send
(
backupsockfd
,
arg
,
sizeof
(
struct
Write
::
Request
),
0
);
int
valread
=
0
;
// bool* retval=NULL;
if
((
valread
=
read
(
backupsockfd
,
buffer
,
2
*
M
))
==
0
)
{
printf
(
"Some backup business2"
);
}
else
{
// The write response received is stored in this struct
struct
Write
::
Response
wr
;
memcpy
(
&
wr
,
buffer
,
sizeof
(
wr
));
//debugging
// *retval=!wr.common.status;
uint64_t
ts
=
wr
.
timestamp
;
D
(
printf
(
"Server received response Timestamp:%"
PRIu64
"
\n
"
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Server received response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
}
D
(
printf
(
"Thread %lu done
\n
"
,
id
));
// int *retval;
// *retval=1;
// return retval;// use clearer logic
return
0
;
}
void
serve_connection
(
int
sockfd
)
{
// if (send(sockfd, "*", 1, 0) < 1) {
// perror_die("send");
// }
//////////////////////////////////////////////////////
static
char
buffer
[
2
*
M
]
=
{
0
};
static
char
buffer2
[
2
*
M
]
=
{
0
};
while
(
1
)
{
// The write request received is stored in this struct
int
valread
=
0
;
if
((
valread
=
read
(
sockfd
,
buffer
,
2
*
M
))
==
0
)
{
printf
(
"Some business1"
);
break
;
}
// printf("III\n");
struct
Write
::
Request
w1
;
//copying the contents of the buffer into the struct
memcpy
(
&
w1
,
buffer
,
sizeof
(
w1
));
int
a
=
w1
.
common
.
opcode
;
D
(
printf
(
"Opcode:%d
\n
"
,
a
));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if
(
a
==
WRITE
)
{
struct
Write
::
Request
w
;
memcpy
(
&
w
,
buffer
,
sizeof
(
w
));
//debugging
D
(
printf
(
"tableId:%lu
\n
"
,
w
.
tableId
));
D
(
printf
(
"key:%lu
\n
"
,
w
.
key
));
//Check the reject rules to confirm
// whether the write packet was for an atomic operation
struct
Write
::
Request
br
;
// br.common.status=STATUS_OK;
// br.rejectRules.givenVersion=0;
// br.tableId=w.tableId;
// br.key=w.key;
br
.
timestamp
=
w
.
timestamp
;
//storing paramters for debugging purposes
// int stat = br.common.status;
// uint64_t vers = br.rejectRules.givenVersion;
// int table = br.tableId;
// int key = br.key;
uint64_t
ts
=
br
.
timestamp
;
D
(
printf
(
"Master sent response Timestamp:%"
PRIu64
"
\n
"
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
pthread_t
master_thread
;
pthread_t
backup_thread
;
pthread_create
(
&
master_thread
,
NULL
,
master_thread_fn
,
&
w
);
pthread_create
(
&
backup_thread
,
NULL
,
backup_thread_fn
,
&
w
);
// void *master_res;
// void *backup_res;
pthread_join
(
master_thread
,
NULL
);
// &master_res);
D
(
printf
(
"master joined
\n
"
));
pthread_join
(
backup_thread
,
NULL
);
// &backup_res);
D
(
printf
(
"backup joined
\n
"
));
//send response to client
// int mre = *(bool*)master_res;
// int bre = *(bool*)backup_res;
// printf("%d %d",mre,bre);
// free(master_res);
// free(backup_res);
// if(mre*bre>0)
// {
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_OK
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sockfd
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
// }
// else
// {
// struct Write::Response wr;
// wr.common.status=STATUS_MAX_VALUE;
// wr.timestamp=w.timestamp;
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// }
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// pthread_join(master_thread[i], NULL);
// D(printf("master joined\n"));
// pthread_join(backup_thread[i], NULL);
// D(printf("backup joined\n"));
// // Detach the thread - when it's done, its resources will be cleaned up.
// // Since the main thread lives forever, it will outlive the serving threads.
// pthread_detach(the_thread);
// send(backupsockfd , &w , sizeof(struct Write::Request), 0 );
// int valread=0;
// if ((valread = read( backupsockfd , buffer2, 2 * M)) == 0)
// {
// printf("Some business2");
// }
// else
// {
// // The write response received is stored in this struct
// struct Write::Response wr;
// memcpy(&wr, buffer2, sizeof(wr));
// //debugging
// uint64_t ts = wr.timestamp;
// D(printf("Server received response Timestamp:%" PRIu64 "\n",ts ));
// chrono::microseconds ms = chrono::duration_cast< chrono::microseconds >(chrono::system_clock::now().time_since_epoch());
// uint64_t ts2 = ms.count();
// D(printf("Server received response at Timestamp:%" PRIu64 "\n", ts2 ));
// //send response to client
// send(sockfd , &wr , sizeof(struct Write::Response), 0 );
// }
}
}
////////////////////////////////
close
(
sockfd
);
}
void
*
server_thread
(
void
*
arg
)
{
thread_config_t
*
config
=
(
thread_config_t
*
)
arg
;
int
sockfd
=
config
->
sockfd
;
free
(
config
);
// This cast will work for Linux, but in general casting pthread_id to an
// integral type isn't portable.
unsigned
long
id
=
(
unsigned
long
)
pthread_self
();
printf
(
"Thread %lu created to handle connection with socket %d
\n
"
,
id
,
sockfd
);
serve_connection
(
sockfd
);
printf
(
"Thread %lu done
\n
"
,
id
);
return
0
;
}
int
main
(
int
argc
,
char
**
argv
)
{
setvbuf
(
stdout
,
NULL
,
_IONBF
,
0
);
int
portnum
=
9090
;
int
backupnum
=
9095
;
int
masternum
=
9099
;
if
(
argc
>=
2
)
{
portnum
=
atoi
(
argv
[
1
]);
}
printf
(
"Serving on port %d
\n
"
,
portnum
);
fflush
(
stdout
);
int
masterfd
=
listen_inet_socket
(
masternum
);
struct
sockaddr_in
master_peer_addr
;
socklen_t
master_peer_addr_len
=
sizeof
(
master_peer_addr
);
// int mastersockfd = // declared global so that all threads have access.
mastersockfd
=
accept
(
masterfd
,
(
struct
sockaddr
*
)
&
master_peer_addr
,
&
master_peer_addr_len
);
if
(
mastersockfd
<
0
)
{
perror_die
(
"ERROR on accept"
);
}
report_backup_connected
(
&
master_peer_addr
,
master_peer_addr_len
);
int
backupfd
=
listen_inet_socket
(
backupnum
);
struct
sockaddr_in
backup_peer_addr
;
socklen_t
backup_peer_addr_len
=
sizeof
(
backup_peer_addr
);
// int backupsockfd = // declared global so that all threads have access.
backupsockfd
=
accept
(
backupfd
,
(
struct
sockaddr
*
)
&
backup_peer_addr
,
&
backup_peer_addr_len
);
if
(
backupsockfd
<
0
)
{
perror_die
(
"ERROR on accept"
);
}
report_backup_connected
(
&
backup_peer_addr
,
backup_peer_addr_len
);
int
sockfd
=
listen_inet_socket
(
portnum
);
while
(
1
)
{
struct
sockaddr_in
peer_addr
;
socklen_t
peer_addr_len
=
sizeof
(
peer_addr
);
int
newsockfd
=
accept
(
sockfd
,
(
struct
sockaddr
*
)
&
peer_addr
,
&
peer_addr_len
);
if
(
newsockfd
<
0
)
{
perror_die
(
"ERROR on accept"
);
}
report_peer_connected
(
&
peer_addr
,
peer_addr_len
);
pthread_t
the_thread
;
thread_config_t
*
config
=
(
thread_config_t
*
)
malloc
(
sizeof
(
*
config
));
if
(
!
config
)
{
die
(
"OOM"
);
}
config
->
sockfd
=
newsockfd
;
pthread_create
(
&
the_thread
,
NULL
,
server_thread
,
config
);
// Detach the thread - when it's done, its resources will be cleaned up.
// Since the main thread lives forever, it will outlive the serving threads.
pthread_detach
(
the_thread
);
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/final_shashank/offload_master.cpp
0 → 100644
View file @
d2d47b86
#include <unistd.h>
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <thread>
#include <vector>
#include <mutex>
#include <chrono>
#include <random>
#include <unordered_map>
#include <utility>
#include <inttypes.h>
#define PORT 9099
#define MILLIS 1000
#define MICRO MILLIS * 1000
#define MAX_TARGET 4
#define K 1024
#define M K * 1024
#define G M * 1024
// in order to turn on debugging, compile with -DDEBUG flag
#ifdef DEBUG
#define D(x) (x)
#else
#define D(x) do{}while(0)
#endif
using
namespace
std
;
unordered_map
<
string
,
uint64_t
>
m
;
unordered_map
<
string
,
uint64_t
>
master_data
;
// This hashmap emulates the master's DRAM
fd_set
readfds
;
// unordered_map<string, uint64_t> m;
vector
<
uint64_t
>
latv
;
// vector to store the differnce in timestamps for each packet
// Time since epoch in microseconds is typecast to uint64_t
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
struct
Backup_Write
{
static
const
Opcode
opcode
=
BACKUP_WRITE
;
static
const
ServiceType
service
=
BACKUP_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
timestamp
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
uint8_t
array
[
15
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
tableId
;
uint64_t
key
;
uint64_t
version
;
uint64_t
timestamp
;
uint64_t
array
[
8
];
#ifdef P1000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
#endif
#ifdef P10000
uint64_t
array2
[
112
];
uint8_t
array3
[
4
];
uint64_t
array4
[
1125
];
#endif
}
__attribute__
((
packed
));
};
int
main
(
int
argc
,
char
const
*
argv
[])
{
int
sock
=
0
,
valread
;
struct
sockaddr_in
serv_addr
;
char
*
hello
=
"Master preprocessing done"
;
char
buffer1
[
1024
]
=
{
0
};
struct
sockaddr_in
address
;
int
opt
=
1
;
int
addrlen
=
sizeof
(
address
);
if
((
sock
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
<
0
)
{
printf
(
"
\n
Socket creation error
\n
"
);
return
-
1
;
}
serv_addr
.
sin_family
=
AF_INET
;
serv_addr
.
sin_port
=
htons
(
PORT
);
// Convert IPv4 and IPv6 addresses from text to binary form
if
(
inet_pton
(
AF_INET
,
"192.168.200.21"
,
&
serv_addr
.
sin_addr
)
<=
0
)
//10.129.2.181
{
printf
(
"
\n
Invalid address/ Address not supported
\n
"
);
return
-
1
;
}
if
(
connect
(
sock
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
printf
(
"
\n
Connection Failed
\n
"
);
return
-
1
;
}
int
sd
,
max_sd
;
int
csd
,
msd
;
int
max_clients
=
1
;
int
activity
;
int
client_socket
=
0
;
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
//setting up master's DRAM before responding to the synchronisation packet
for
(
int
i
=
0
;
i
<
2000000
;
i
++
)
{
master_data
.
insert
({
"1$"
+
to_string
(
i
),
1
});
}
// if( send(sock, hello, strlen(hello), 0) != strlen(hello) )
// {
// perror("send");
// }
// valread = read( sock , buffer1, 1024);
// if(valread>0)
// {
// printf("%s\n",buffer1 );
// memset(&buffer1[0], 0, sizeof(buffer1));
// }
while
(
1
)
{
//clear the socket set
FD_ZERO
(
&
readfds
);
//add master socket to set
FD_SET
(
sock
,
&
readfds
);
max_sd
=
sock
;
//socket descriptor
sd
=
client_socket
;
//if valid socket descriptor then add to read list
if
(
sd
>
0
)
FD_SET
(
sd
,
&
readfds
);
//highest file descriptor number, need it for the select function
if
(
sd
>
max_sd
)
max_sd
=
sd
;
//wait for an activity on one of the sockets , timeout is NULL ,
//so wait indefinitely
activity
=
select
(
max_sd
+
1
,
&
readfds
,
NULL
,
NULL
,
NULL
);
if
((
activity
<
0
)
&&
(
errno
!=
EINTR
))
{
printf
(
"select error"
);
}
//If something happened on the master socket ,
if
(
FD_ISSET
(
sock
,
&
readfds
))
{
int
n
=
0
;
static
char
buffer
[
2
*
M
]
=
{
0
};
n
=
read
(
sock
,
buffer
,
2
*
M
);
if
(
n
>
0
)
{
// The write request received is stored in this struct
struct
Write
::
Request
w1
;
memcpy
(
&
w1
,
buffer
,
sizeof
(
w1
));
int
a
=
w1
.
common
.
opcode
;
D
(
printf
(
"Opcode:%d
\n
"
,
a
));
//comparing the opcode to ensure that only write packets are processed by the offload
//and all other packets pass through
if
(
a
==
WRITE
)
{
struct
Write
::
Request
w
;
memcpy
(
&
w
,
buffer
,
sizeof
(
w
));
//debugging
D
(
printf
(
"tableId:%lu
\n
"
,
w
.
tableId
));
D
(
printf
(
"key:%lu
\n
"
,
w
.
key
));
string
s
=
""
;
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s
=
s
+
to_string
(
w
.
tableId
)
+
"$"
+
to_string
(
w
.
key
);
D
(
printf
(
"HashKey:%s
\n
"
,
s
.
c_str
()));
//Master checks the reject rules to respond with failure
//if operation is atomic and there is a version number mismatch
if
(
w
.
rejectRules
.
versionNeGiven
)
{
string
s
=
""
;
//Create the key for the hashmap by concatenating
//the tableId and the key in the write packet
s
=
s
+
to_string
(
w
.
tableId
)
+
"$"
+
to_string
(
w
.
key
);
D
(
cout
<<
s
<<
"
\n
"
);
if
(
master_data
.
find
(
s
)
!=
master_data
.
end
())
{
D
(
std
::
cout
<<
"Key found
\n
"
);
uint64_t
curr_version_number
=
master_data
[
s
];
//compare curr_version_number with version number in w
if
(
w
.
rejectRules
.
givenVersion
!=
curr_version_number
)
{
D
(
std
::
cout
<<
"version number doesn't match
\n
"
);
//raise failure response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_WRONG_VERSION
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
version
=
curr_version_number
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
" master raised failure response"
));
}
else
{
D
(
std
::
cout
<<
"version number matches
\n
"
);
//update version number in master
master_data
[
s
]
=
master_data
[
s
]
+
(
uint64_t
)
1
;
D
(
printf
(
"Updated master_data Key:%s Version:%lu
\n
"
,
s
.
c_str
(),
master_data
[
s
]));
//raise success response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_OK
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
version
=
master_data
[
s
];
wr
.
timestamp
=
w
.
timestamp
;
//storing paramters for debugging purposes
int
stat
=
wr
.
common
.
status
;
int
table
=
wr
.
tableId
;
int
key
=
wr
.
key
;
int
vers
=
wr
.
version
;
uint64_t
ts
=
wr
.
timestamp
;
D
(
printf
(
"Master sent response Table:%d Key:%d Version:%d Status:%d Timestamp:%"
PRIu64
"
\n
"
,
table
,
key
,
vers
,
stat
,
ts
));
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
// printf("Master sent hashmap Key:%s Version:%lu\n",s.c_str(), vers);
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"master raised success response"
));
}
}
else
{
D
(
std
::
cout
<<
"Key not found in master data
\n
"
);
struct
Write
::
Response
wr
;
//raise failure response since object does not exist in master's DRAM
wr
.
common
.
status
=
STATUS_OBJECT_DOESNT_EXIST
;
wr
.
version
=
1
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"object doesn't exist in master, sent auto failure"
));
}
}
else
{
//raise success response
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_OK
;
wr
.
version
=
1
;
wr
.
tableId
=
w
.
tableId
;
wr
.
key
=
w
.
key
;
wr
.
timestamp
=
w
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
D
(
printf
(
"%s
\n
"
,
"non transaction packet, sent auto success
\n
"
));
}
}
//termination packet for debugging
else
if
(
a
==
ILLEGAL_RPC_TYPE
)
{
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_MAX_VALUE
;
wr
.
version
=
1
;
wr
.
tableId
=
w1
.
tableId
;
wr
.
key
=
w1
.
key
;
wr
.
timestamp
=
w1
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
D
(
printf
(
"%s
\n
"
,
"testing
\n
"
));
break
;
}
// else error
else
{
struct
Write
::
Response
wr
;
wr
.
common
.
status
=
STATUS_MAX_VALUE
;
wr
.
version
=
1
;
wr
.
tableId
=
w1
.
tableId
;
wr
.
key
=
w1
.
key
;
wr
.
timestamp
=
w1
.
timestamp
;
send
(
sock
,
&
wr
,
sizeof
(
struct
Write
::
Response
),
0
);
chrono
::
microseconds
ms
=
chrono
::
duration_cast
<
chrono
::
microseconds
>
(
chrono
::
system_clock
::
now
().
time_since_epoch
());
uint64_t
ts2
=
ms
.
count
();
D
(
printf
(
"Master sent response at Timestamp:%"
PRIu64
"
\n
"
,
ts2
));
D
(
printf
(
"%s
\n
"
,
"testing
\n
"
));
break
;
}
}
}
}
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/final_shashank/utils.cpp
0 → 100644
View file @
d2d47b86
// Utility functions for socket servers in C.
//
// Eli Bendersky [http://eli.thegreenplace.net]
// This code is in the public domain.
#include "utils.h"
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
// #define _GNU_SOURCE
#include <netdb.h>
#define N_BACKLOG 64
void
die
(
char
*
fmt
,
...)
{
va_list
args
;
va_start
(
args
,
fmt
);
vfprintf
(
stderr
,
fmt
,
args
);
va_end
(
args
);
fprintf
(
stderr
,
"
\n
"
);
exit
(
EXIT_FAILURE
);
}
void
*
xmalloc
(
size_t
size
)
{
void
*
ptr
=
malloc
(
size
);
if
(
!
ptr
)
{
die
(
"malloc failed"
);
}
return
ptr
;
}
void
perror_die
(
char
*
msg
)
{
perror
(
msg
);
exit
(
EXIT_FAILURE
);
}
void
report_peer_connected
(
const
struct
sockaddr_in
*
sa
,
socklen_t
salen
)
{
char
hostbuf
[
NI_MAXHOST
];
char
portbuf
[
NI_MAXSERV
];
if
(
getnameinfo
((
struct
sockaddr
*
)
sa
,
salen
,
hostbuf
,
NI_MAXHOST
,
portbuf
,
NI_MAXSERV
,
0
)
==
0
)
{
printf
(
"peer (%s, %s) connected
\n
"
,
hostbuf
,
portbuf
);
}
else
{
printf
(
"peer (unknonwn) connected
\n
"
);
}
}
void
report_backup_connected
(
const
struct
sockaddr_in
*
sa
,
socklen_t
salen
)
{
char
hostbuf
[
NI_MAXHOST
];
char
portbuf
[
NI_MAXSERV
];
if
(
getnameinfo
((
struct
sockaddr
*
)
sa
,
salen
,
hostbuf
,
NI_MAXHOST
,
portbuf
,
NI_MAXSERV
,
0
)
==
0
)
{
printf
(
"backup (%s, %s) connected
\n
"
,
hostbuf
,
portbuf
);
}
else
{
printf
(
"backup (unknonwn) connected
\n
"
);
}
}
int
listen_inet_socket
(
int
portnum
)
{
int
sockfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sockfd
<
0
)
{
perror_die
(
"ERROR opening socket"
);
}
// This helps avoid spurious EADDRINUSE when the previous instance of this
// server died.
int
opt
=
1
;
if
(
setsockopt
(
sockfd
,
SOL_SOCKET
,
SO_REUSEADDR
,
&
opt
,
sizeof
(
opt
))
<
0
)
{
perror_die
(
"setsockopt"
);
}
struct
sockaddr_in
serv_addr
;
memset
(
&
serv_addr
,
0
,
sizeof
(
serv_addr
));
serv_addr
.
sin_family
=
AF_INET
;
serv_addr
.
sin_addr
.
s_addr
=
INADDR_ANY
;
serv_addr
.
sin_port
=
htons
(
portnum
);
if
(
bind
(
sockfd
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
perror_die
(
"ERROR on binding"
);
}
if
(
listen
(
sockfd
,
N_BACKLOG
)
<
0
)
{
perror_die
(
"ERROR on listen"
);
}
return
sockfd
;
}
void
make_socket_non_blocking
(
int
sockfd
)
{
int
flags
=
fcntl
(
sockfd
,
F_GETFL
,
0
);
if
(
flags
==
-
1
)
{
perror_die
(
"fcntl F_GETFL"
);
}
if
(
fcntl
(
sockfd
,
F_SETFL
,
flags
|
O_NONBLOCK
)
==
-
1
)
{
perror_die
(
"fcntl F_SETFL O_NONBLOCK"
);
}
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/final_shashank/utils.h
0 → 100644
View file @
d2d47b86
// Utility functions for socket servers in C.
#ifndef UTILS_H
#define UTILS_H
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
// Dies (exits with a failure status) after printing the given printf-like
// message to stdout.
void
die
(
char
*
fmt
,
...);
// Wraps malloc with error checking: dies if malloc fails.
void
*
xmalloc
(
size_t
size
);
// Dies (exits with a failure status) after printing the current perror status
// prefixed with msg.
void
perror_die
(
char
*
msg
);
// Reports a peer connection to stdout. sa is the data populated by a successful
// accept() call.
void
report_peer_connected
(
const
struct
sockaddr_in
*
sa
,
socklen_t
salen
);
// Reports a backup connection to stdout. sa is the data populated by a successful
// accept() call.
void
report_backup_connected
(
const
struct
sockaddr_in
*
sa
,
socklen_t
salen
);
// Creates a bound and listening INET socket on the given port number. Returns
// the socket fd when successful; dies in case of errors.
int
listen_inet_socket
(
int
portnum
);
// Sets the given socket into non-blocking mode.
void
make_socket_non_blocking
(
int
sockfd
);
#endif
/* UTILS_H */
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.cc
0 → 100644
View file @
d2d47b86
#ifndef __BUFFER_CC__
#define __BUFFER_CC__
#include <algorithm>
#include <thread>
#include <vector>
#include "Buffer.hpp"
#include "common.hpp"
#include "queue_context.hpp"
Buffer
::
Buffer
(
size_t
buf_size
)
{
this
->
buffer_size
=
buf_size
;
this
->
buffer
.
resize
(
buf_size
);
// this->ready_events.resize(buf_size);
for
(
int
i
=
0
;
i
<
buf_size
;
i
++
)
{
this
->
buffer
[
i
].
fd_id
=
-
1
;
this
->
buffer
[
i
].
in_use
=
false
;
this
->
buffer
[
i
].
cr
=
NULL
;
// this->ready_events[i] = -1;
}
if
(
pthread_mutex_init
(
&
(
this
->
buffer_lock
),
NULL
))
{
perror
(
"Unable to init buffer lock"
);
exit
(
-
1
);
}
}
Buffer
::~
Buffer
()
{
for
(
int
i
=
0
;
i
<
this
->
buffer_size
;
i
++
)
{
if
(
this
->
buffer
[
i
].
fd_id
==-
1
)
continue
;
this
->
buffer
[
i
].
fd_id
=
-
1
;
free
(
this
->
buffer
[
i
].
cr
);
this
->
buffer
[
i
].
cr
=
NULL
;
free
(
this
->
buffer
[
i
].
job
);
this
->
buffer
[
i
].
job
=
NULL
;
}
pthread_mutex_destroy
(
&
(
this
->
buffer_lock
));
}
int
Buffer
::
add_element
(
int
fd
,
job_context
*
job
,
Common_Request
*
cr
)
{
bool
found
=
false
;
for
(
int
i
=
0
;
i
<
this
->
buffer_size
;
i
++
)
{
if
(
this
->
buffer
[
i
].
fd_id
>=
0
)
continue
;
else
{
if
(
job
->
transport_type
==
TCP_IP_TRANSPORT
)
{
this
->
buffer
[
i
].
fd_id
=
job
->
tcp_transport
->
get_conn_fd
();
}
else
if
(
job
->
transport_type
==
RDMA_RC_TRANSPORT
)
{
this
->
buffer
[
i
].
fd_id
=
job
->
rdma_transport
->
get_conn_fd
();
}
this
->
buffer
[
i
].
cr
=
cr
;
this
->
buffer
[
i
].
job
=
job
;
// this->ready_events[i] = 1;
found
=
true
;
break
;
}
}
return
found
?
0
:
-
1
;
}
void
Buffer
::
erase_element
(
int
fd
)
{
for
(
int
i
=
0
;
i
<
this
->
buffer_size
;
i
++
)
{
if
(
this
->
buffer
[
i
].
fd_id
!=
fd
)
continue
;
else
{
this
->
buffer
[
i
].
fd_id
=
-
1
;
this
->
buffer
[
i
].
in_use
=
false
;
this
->
buffer
[
i
].
cr
=
NULL
;
this
->
buffer
[
i
].
job
=
NULL
;
// this->ready_events[i] = -1;
break
;
}
}
return
;
}
void
Buffer
::
erase_element_by_pos
(
int
pos
)
{
this
->
buffer
[
pos
].
fd_id
=
-
1
;
this
->
buffer
[
pos
].
in_use
=
false
;
free
(
this
->
buffer
[
pos
].
cr
);
this
->
buffer
[
pos
].
cr
=
NULL
;
free
(
this
->
buffer
[
pos
].
job
);
this
->
buffer
[
pos
].
job
=
NULL
;
// this->ready_events[i] = -1;
return
;
}
int
Buffer
::
poll
(
int
fd
)
{
pthread_mutex_lock
(
&
(
this
->
buffer_lock
));
int
i
;
int
j
=
-
1
;
for
(
i
=
0
;
i
<
this
->
buffer_size
;
i
++
)
{
if
(
this
->
buffer
[
i
].
fd_id
==
fd
&&
!
this
->
buffer
[
i
].
in_use
)
{
this
->
buffer
[
i
].
in_use
=
true
;
j
=
i
;
break
;
}
}
pthread_mutex_unlock
(
&
(
this
->
buffer_lock
));
return
j
;
}
Buffer_Element
Buffer
::
get_element
(
int
pos
)
{
return
this
->
buffer
[
pos
];
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/Buffer.hpp
0 → 100644
View file @
d2d47b86
#ifndef __BUFFER_H__
#define __BUFFER_H__
#include <thread>
#include <vector>
#include "common.hpp"
#include "queue_context.hpp"
struct
Buffer_Element
{
int
fd_id
;
// using fd as an id for threads
bool
in_use
;
struct
Common_Request
*
cr
;
struct
job_context
*
job
;
Buffer_Element
()
{
this
->
fd_id
=
-
1
;
this
->
cr
=
NULL
;}
};
class
Buffer
{
private:
pthread_mutex_t
buffer_lock
;
public:
// std::vector<int> ready_events;
size_t
buffer_size
;
std
::
vector
<
Buffer_Element
>
buffer
;
Buffer
(
size_t
buf_size
);
~
Buffer
();
int
add_element
(
int
fd
,
job_context
*
job
,
Common_Request
*
cr
);
void
erase_element
(
int
fd
);
void
erase_element_by_pos
(
int
pos
);
int
poll
();
int
poll
(
int
fd
);
Buffer_Element
get_element
(
int
pos
);
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/ThreadsafeQueueTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <thread>
#include <pthread.h>
#include "queue_context.hpp"
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
using
namespace
std
;
Thread_Safe_Queue
q
;
void
work
(
int
a
)
{
struct
job_context
*
t
=
q
.
get_job
();
if
(
t
==
NULL
)
{
cout
<<
"Queue Empty"
<<
endl
;
return
;
}
cout
<<
"Got job"
<<
endl
;
RDMA_config
*
config
=
t
->
rdma_transport
->
get_config
();
cout
<<
config
->
mr
.
mr_size
<<
endl
;
}
int
main
()
{
RDMA_Transport
*
t
=
new
RDMA_Transport
();
cout
<<
q
.
queue_size
<<
endl
;
q
.
enqueue
(
t
,
RDMA_RC_WRITE
);
cout
<<
q
.
queue_size
<<
endl
;
thread
th
(
work
,
0
);
thread
th1
(
work
,
1
);
th
.
join
();
th1
.
join
();
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.cc
0 → 100644
View file @
d2d47b86
#ifndef __CLI_API_CC__
#define __CLI_API_CC__
#include <iostream>
#include <string>
#include <vector>
#include "cli_api.hpp"
namespace
{
//to make strip_whitespaces not create an error with
//with same name function in read_config
//still a hack but have to do
std
::
string
strip_whitespaces
(
std
::
string
str
)
{
std
::
string
::
iterator
itbegin
=
str
.
begin
();
std
::
string
::
iterator
itend
=
str
.
end
();
while
(
itbegin
!=
str
.
end
()
&&
(
*
itbegin
)
==
' '
)
itbegin
++
;
while
(
itend
!=
str
.
begin
()
&&
(
*
itend
)
==
' '
)
itend
--
;
return
std
::
string
(
itbegin
,
itend
);
}
}
std
::
vector
<
std
::
string
>
tokenize
(
std
::
string
raw_str
)
{
std
::
vector
<
std
::
string
>
tokens
;
std
::
string
stripped_str
=
strip_whitespaces
(
raw_str
);
std
::
string
temp
;
std
::
string
::
iterator
it
=
stripped_str
.
begin
();
while
(
it
!=
stripped_str
.
end
())
{
if
(
*
(
it
)
==
' '
)
{
while
(
temp
[
temp
.
size
()
-
1
]
==
'\n'
)
{
temp
.
pop_back
();
}
tokens
.
push_back
(
temp
);
temp
.
clear
();
while
(
*
(
it
)
==
' '
&&
it
!=
stripped_str
.
end
())
it
++
;
}
else
{
temp
.
push_back
(
*
(
it
));
it
++
;
}
}
if
(
!
temp
.
empty
())
{
while
(
temp
[
temp
.
size
()
-
1
]
==
'\n'
)
{
temp
.
pop_back
();
}
tokens
.
push_back
(
temp
);
}
return
tokens
;
}
std
::
string
get_key
(
char
*
kv
)
{
std
::
string
key
;
char
*
t
=
kv
;
while
(
*
t
!=
'\0'
)
{
key
.
push_back
(
*
t
);
t
++
;
}
return
key
;
}
std
::
string
get_val
(
char
*
kv
)
{
std
::
string
val
;
char
*
t
=
kv
;
while
(
*
t
!=
'\0'
)
{
t
++
;
}
t
++
;
while
(
*
t
!=
'\0'
)
{
val
.
push_back
(
*
t
);
}
return
val
;
}
std
::
pair
<
std
::
string
,
std
::
string
>
get_kv
(
char
*
kv
)
{
std
::
pair
<
std
::
string
,
std
::
string
>
ret_pair
;
ret_pair
.
first
=
get_key
(
kv
);
ret_pair
.
second
=
get_val
(
kv
);
return
ret_pair
;
}
char
*
get_key_ptr
(
char
*
blob
)
{
return
(
char
*
)
blob
;
}
char
*
get_val_ptr
(
char
*
blob
)
{
size_t
i
=
0
;
char
ch
;
ch
=
blob
[
0
];
while
(
ch
!=
'\0'
)
{
ch
=
blob
[
++
i
];
}
i
++
;
return
(
blob
+
i
);
}
size_t
get_key_size_from_blob
(
char
*
blob
)
{
size_t
i
=
0
;
char
ch
;
ch
=
blob
[
0
];
while
(
ch
!=
'\0'
)
{
ch
=
blob
[
++
i
];
}
return
i
;
}
size_t
get_val_size_from_blob
(
char
*
blob
)
{
char
*
ptr
=
blob
;
size_t
i
=
0
;
char
ch
;
ch
=
blob
[
0
];
while
(
ch
!=
'\0'
)
{
ch
=
blob
[
++
i
];
}
i
++
;
ptr
=
blob
+
i
;
ch
=
*
ptr
;
i
=
0
;
while
(
ch
!=
'\0'
)
{
ch
=
ptr
[
++
i
];
}
return
i
;
}
size_t
get_key_size_from_start_ptr
(
char
*
key_ptr
)
{
size_t
i
=
0
;
char
ch
;
ch
=
key_ptr
[
0
];
while
(
ch
!=
'\0'
)
{
ch
=
key_ptr
[
++
i
];
}
return
i
;
}
size_t
get_val_size_from_start_ptr
(
char
*
val_ptr
)
{
size_t
i
=
0
;
char
ch
;
ch
=
val_ptr
[
0
];
while
(
ch
!=
'\0'
)
{
ch
=
val_ptr
[
++
i
];
}
return
i
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/cli_api.hpp
0 → 100644
View file @
d2d47b86
#ifndef __CLI_API_H__
#define __CLI_API_H__
#include <string>
#include <vector>
std
::
vector
<
std
::
string
>
tokenize
(
std
::
string
raw_str
);
std
::
string
get_key
(
char
*
kv
);
std
::
string
get_val
(
char
*
kv
);
std
::
pair
<
std
::
string
,
std
::
string
>
get_kv
(
char
*
kv
);
char
*
get_key_ptr
(
char
*
blob
);
char
*
get_val_ptr
(
char
*
blob
);
size_t
get_key_size_from_blob
(
char
*
blob
);
size_t
get_val_size_from_blob
(
char
*
blob
);
size_t
get_key_size_from_start_ptr
(
char
*
key_ptr
);
size_t
get_val_size_from_start_ptr
(
char
*
val_ptr
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/cli_apiTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <cstring>
#include <string>
#include <vector>
#include "cli_api.hpp"
using
namespace
std
;
int
main
()
{
char
*
ip
=
(
char
*
)
malloc
(
1024
);
vector
<
string
>
tokens
;
while
(
true
)
{
memset
(
ip
,
0
,
1024
);
cin
.
getline
(
ip
,
1024
);
printf
(
"Got raw cmd: %s
\n
"
,
ip
);
tokens
=
tokenize
(
string
(
ip
));
if
(
tokens
[
0
].
compare
(
"exit"
)
==
0
||
tokens
[
0
].
compare
(
"EXIT"
)
==
0
)
{
printf
(
"Got exit: %s
\n
"
,
tokens
[
0
].
c_str
());
break
;
}
printf
(
"Command
\t\t
Arg1
\t\t
Arg2
\n
"
);
printf
(
"%s
\t\t
%s
\t\t
%s
\n
"
,
tokens
[
0
].
c_str
(),
tokens
[
1
].
c_str
(),
tokens
[
2
].
c_str
());
}
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/client_functions.cc
0 → 100644
View file @
d2d47b86
#ifndef __CLIENT_FUNCTIONS_CC__
#define __CLIENT_FUNCTIONS_CC__
#include <chrono>
#include <stddef.h>
#include "common.hpp"
namespace
chrono
=
std
::
chrono
;
int
read_request
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
)
{
// ignore for now
return
0
;
}
int
read_request
(
RDMA_Transport
*
transport
)
{
if
(
transport
->
one_sided_read
())
{
// error
return
-
1
;
}
return
0
;
}
int
write_request
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
)
{
size_t
final_size
=
sizeof
(
struct
Common_Request
)
+
key_size
+
1
+
val_size
;
char
*
final_packet
=
(
char
*
)
malloc
(
final_size
);
char
*
t
;
Common_Request
*
w
=
(
Common_Request
*
)
final_packet
;
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
service_type
=
MASTER_SERVICE
;
w
->
req
.
w_request
.
length
=
key_size
+
val_size
;
t
=
final_packet
+
sizeof
(
struct
Common_Request
);
memcpy
(
t
,
key
,
key_size
);
t
=
final_packet
+
sizeof
(
struct
Common_Request
)
+
key_size
;
*
t
=
'\0'
;
t
++
;
memcpy
(
t
,
val
,
val_size
);
transport
->
mr
=
final_packet
;
transport
->
mr_size
=
final_size
;
if
(
transport
->
send_data
())
{
//error
free
(
final_packet
);
return
-
1
;
}
free
(
final_packet
);
return
0
;
}
int
write_request
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
)
{
size_t
final_size
=
sizeof
(
struct
Common_Request
)
+
key_size
+
1
+
val_size
;
char
*
final_packet
=
(
char
*
)
malloc
(
final_size
);
char
*
t
;
Common_Request
*
w
=
(
Common_Request
*
)
final_packet
;
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
service_type
=
MASTER_SERVICE
;
w
->
req
.
w_request
.
length
=
key_size
+
val_size
;
t
=
final_packet
+
sizeof
(
struct
Common_Request
);
memcpy
(
t
,
key
,
key_size
);
t
=
final_packet
+
sizeof
(
struct
Common_Request
)
+
key_size
;
*
t
=
'\0'
;
t
++
;
memcpy
(
t
,
val
,
val_size
);
transport
->
copy_to_mr
(
final_packet
,
final_size
);
if
(
transport
->
one_sided_write
())
{
//error
free
(
final_packet
);
return
-
1
;
}
free
(
final_packet
);
return
0
;
}
int
write_request
(
TCP_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
)
{
size_t
final_size
=
sizeof
(
struct
Common_Request
)
+
key_size
+
1
+
val_size
;
char
*
final_packet
=
(
char
*
)
malloc
(
final_size
);
char
*
t
;
Common_Request
*
w
=
(
Common_Request
*
)
final_packet
;
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
service_type
=
MASTER_SERVICE
;
w
->
req
.
w_request
.
length
=
key_size
+
val_size
;
t
=
final_packet
+
sizeof
(
struct
Common_Request
);
memcpy
(
t
,
key
,
key_size
);
t
=
final_packet
+
sizeof
(
struct
Common_Request
)
+
key_size
;
*
t
=
'\0'
;
t
++
;
memcpy
(
t
,
val
,
val_size
);
transport
->
mr
=
final_packet
;
transport
->
mr_size
=
final_size
;
if
(
transport
->
send_data
())
{
//error
free
(
final_packet
);
return
-
1
;
}
free
(
final_packet
);
return
0
;
}
int
write_request
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
)
{
size_t
final_size
=
sizeof
(
struct
Common_Request
)
+
key_size
+
1
+
val_size
+
1
;
char
*
final_packet
=
(
char
*
)
malloc
(
final_size
);
char
*
t
;
memset
(
final_packet
,
0
,
final_size
);
Common_Request
*
w
=
(
Common_Request
*
)
final_packet
;
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
service_type
=
MASTER_SERVICE
;
w
->
req
.
w_request
.
length
=
key_size
+
1
+
val_size
+
1
;
w
->
request_start_time
=
chrono
::
steady_clock
::
now
();
t
=
final_packet
+
sizeof
(
struct
Common_Request
);
memcpy
(
t
,
key
,
key_size
);
t
=
final_packet
+
sizeof
(
struct
Common_Request
)
+
key_size
;
*
t
=
'\0'
;
t
++
;
memcpy
(
t
,
val
,
val_size
);
memcpy
(
transport
->
get_mr_addr
(),
final_packet
,
final_size
);
if
(
transport
->
one_sided_write
())
{
//error
free
(
final_packet
);
return
-
1
;
}
free
(
final_packet
);
return
0
;
}
int
read_rpc
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
)
{
//pass for now
return
0
;
}
int
read_rpc
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
)
{
//pass for now
return
0
;
}
int
read_rpc
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
)
{
// first if key is not null
if
(
key
==
NULL
)
{
return
-
1
;
}
// key could be valid, send req to master to initiate read
size_t
final_size
=
sizeof
(
Common_Request
)
+
key_size
+
1
;
char
*
final_packet
=
(
char
*
)
malloc
(
final_size
);
memset
(
final_packet
,
0
,
final_size
);
Common_Request
*
cr
;
Common_Request
prep_request
,
ack
;
cr
=
(
Common_Request
*
)
final_packet
;
cr
->
opcode
=
READ
;
cr
->
service_type
=
MASTER_SERVICE
;
cr
->
type
=
TYPE_REQUEST
;
cr
->
req
.
r_request
.
common
.
opcode
=
READ
;
cr
->
req
.
r_request
.
common
.
service
=
MASTER_SERVICE
;
cr
->
req
.
r_request
.
keyLength
=
key_size
;
memcpy
(
final_packet
+
sizeof
(
Common_Request
),
key
,
key_size
);
memcpy
(
transport
->
get_mr_addr
(),
final_packet
,
final_size
);
// send main intiating request
TCP_Transport
*
tcp_transport
=
transport
->
get_tcp_conn
();
tcp_transport
->
send_data
((
char
*
)
cr
,
sizeof
(
Common_Request
));
// wait for master to send prep request for reading
tcp_transport
->
recv_data
((
char
*
)
&
prep_request
,
sizeof
(
Common_Request
));
// one sided read from MASTER MR
read_request
(
transport
);
// send ack to MASTER
ack
.
opcode
=
READ
;
ack
.
service_type
=
MASTER_SERVICE
;
ack
.
type
=
TYPE_RESPONSE
;
ack
.
req
.
r_response
.
common
.
status
=
STATUS_OK
;
ack
.
req
.
r_response
.
length
=
0
;
std
::
string
value
(
transport
->
get_mr_addr
()
+
sizeof
(
Common_Request
));
if
(
debug
)
{
printf
(
"value read is: %s
\n
"
,
value
.
c_str
());
}
tcp_transport
->
send_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
return
0
;
}
int
read_rpc
(
TCP_Transport
*
transport
,
std
::
string
key
,
size_t
key_size
)
{
//pass for now
return
0
;
}
int
read_rpc
(
RDMA_Transport
*
transport
,
std
::
string
key
,
size_t
key_size
)
{
//pass for now
return
0
;
}
int
write_rpc
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
)
{
write_request
(
transport
,
key
,
key_size
,
val
,
val_size
);
//check ack
Common_Request
ack
;
memset
((
void
*
)
&
ack
,
0
,
sizeof
(
Common_Request
));
transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
!=
STATUS_OK
)
{
//error
return
-
1
;
}
//success
return
0
;
}
int
write_rpc
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
)
{
Common_Request
cr
;
Common_Request
ack
;
TCP_Transport
*
tcp_transport
=
transport
->
get_tcp_conn
();
memset
((
void
*
)
&
cr
,
0
,
sizeof
(
cr
));
cr
.
opcode
=
WRITE
;
cr
.
req
.
w_request
.
common
.
opcode
=
WRITE
;
cr
.
req
.
w_request
.
length
=
0
;
cr
.
service_type
=
MASTER_SERVICE
;
cr
.
req
.
w_request
.
common
.
service
=
MASTER_SERVICE
;
write_request
(
transport
,
key
,
key_size
,
val
,
val_size
);
// set_mr does a malloc, so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport
->
send_data
((
char
*
)
&
cr
,
sizeof
(
cr
));
//check ack
memset
((
void
*
)
&
ack
,
0
,
sizeof
(
ack
));
tcp_transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
!=
STATUS_OK
)
{
//error
return
-
1
;
}
//success
return
0
;
}
int
write_rpc
(
TCP_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
)
{
write_request
(
transport
,
key
,
key_size
,
val
,
val_size
);
//check ack
Common_Request
ack
;
memset
((
void
*
)
&
ack
,
0
,
sizeof
(
Common_Request
));
transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
!=
STATUS_OK
)
{
//error
return
-
1
;
}
//success
return
0
;
}
int
write_rpc
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
)
{
Common_Request
cr
;
Common_Request
ack
;
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
TCP_Transport
*
tcp_transport
=
transport
->
get_tcp_conn
();
memset
((
void
*
)
&
cr
,
0
,
sizeof
(
cr
));
cr
.
opcode
=
WRITE
;
cr
.
type
=
TYPE_REQUEST
;
cr
.
req
.
w_request
.
common
.
opcode
=
WRITE
;
cr
.
req
.
w_request
.
length
=
0
;
cr
.
service_type
=
MASTER_SERVICE
;
cr
.
req
.
w_request
.
common
.
service
=
MASTER_SERVICE
;
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
write_request
(
transport
,
key
,
key_size
,
val
,
val_size
);
// set_mr does a malloc so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport
->
send_data
((
char
*
)
&
cr
,
sizeof
(
cr
));
//check ack
memset
((
void
*
)
&
ack
,
0
,
sizeof
(
ack
));
tcp_transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
!=
STATUS_OK
)
{
//error
return
-
1
;
}
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
client_rtt_time
.
push_back
(
end_time
-
start_time
);
}
//success
return
0
;
}
int
write_send_request
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
)
{
Common_Request
cr
;
Common_Request
ack
;
TCP_Transport
*
tcp_transport
=
transport
->
get_tcp_conn
();
memset
((
void
*
)
&
cr
,
0
,
sizeof
(
cr
));
cr
.
opcode
=
WRITE
;
cr
.
type
=
TYPE_REQUEST
;
cr
.
req
.
w_request
.
common
.
opcode
=
WRITE
;
cr
.
req
.
w_request
.
length
=
0
;
cr
.
service_type
=
MASTER_SERVICE
;
cr
.
req
.
w_request
.
common
.
service
=
MASTER_SERVICE
;
cr
.
request_start_time
=
chrono
::
steady_clock
::
now
();
write_request
(
transport
,
key
,
key_size
,
val
,
val_size
);
// set_mr does a malloc so skipping it here
// tcp_transport->set_mr((char*)&cr, sizeof(cr));
// tcp_transport->send_data();
tcp_transport
->
send_data
((
char
*
)
&
cr
,
sizeof
(
cr
));
return
0
;
}
int
write_get_response
(
RDMA_Transport
*
transport
)
{
auto
end_time
=
chrono
::
steady_clock
::
now
();
Common_Request
ack
;
TCP_Transport
*
tcp_transport
=
transport
->
get_tcp_conn
();
memset
((
void
*
)
&
ack
,
0
,
sizeof
(
ack
));
tcp_transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
!=
STATUS_OK
)
{
//error
return
-
1
;
}
//success
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
client_rtt_time
.
push_back
(
end_time
-
ack
.
request_start_time
);
}
return
0
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/client_functions.hpp
0 → 100644
View file @
d2d47b86
#ifndef __CLIENT_FUNCTIONS_H__
#define __CLIENT_FUNCTIONS_H__
#include <stddef.h>
#include "../transport_api/transport_config.hpp"
// int read_request(TCP_Transport *transport, char* key, size_t key_size);
// int read_request(RDMA_Transport *transport, char* key, size_t key_size);
// int read_request(TCP_Transport *transport, const char* key, size_t key_size);
// int read_request(RDMA_Transport *transport, const char* key, size_t key_size);
int
read_request
(
RDMA_Transport
*
transport
);
int
write_request
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
);
int
write_request
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
);
int
write_request
(
TCP_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
);
int
write_request
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
);
int
read_rpc
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
);
int
read_rpc
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
);
int
read_rpc
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
);
int
read_rpc
(
TCP_Transport
*
transport
,
std
::
string
key
,
size_t
key_size
);
int
read_rpc
(
RDMA_Transport
*
transport
,
std
::
string
key
,
size_t
key_size
);
int
write_rpc
(
TCP_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
);
int
write_rpc
(
RDMA_Transport
*
transport
,
char
*
key
,
size_t
key_size
,
char
*
val
,
size_t
val_size
);
int
write_rpc
(
TCP_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
);
int
write_rpc
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
);
int
write_send_request
(
RDMA_Transport
*
transport
,
const
char
*
key
,
size_t
key_size
,
const
char
*
val
,
size_t
val_size
);
int
write_get_response
(
RDMA_Transport
*
transport
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/common.cc
0 → 100644
View file @
d2d47b86
#ifndef __COMMON_CC__
#define __COMMON_CC__
#include <atomic>
#include <chrono>
#include <string>
#include <unordered_map>
#include "Buffer.hpp"
#include "../transport_api/transport_config.hpp"
#include "threadsafe_queue.hpp"
#include "log.hpp"
#include "common.hpp"
namespace
chrono
=
std
::
chrono
;
// TIME MEASUREMENT STRUCTURES
std
::
vector
<
chrono
::
duration
<
double
>>
request_queue_wait_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
response_buffer_wait_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
send_queue_wait_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
client_rtt_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
worker_read_service_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
worker_write_service_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
append_log_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
master_backup_ack_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
rdma_one_sided_write_time
;
std
::
vector
<
chrono
::
duration
<
double
>>
rdma_one_sided_read_time
;
double
request_queue_wait_time_sum
=
0.0
;
double
response_buffer_wait_time_sum
=
0.0
;
double
send_queue_wait_time_sum
=
0.0
;
double
client_rtt_time_sum
=
0.0
;
double
worker_read_service_time_sum
=
0.0
;
double
worker_write_service_time_sum
=
0.0
;
double
append_log_time_sum
=
0.0
;
double
master_backup_ack_time_sum
=
0.0
;
double
rdma_one_sided_write_time_sum
=
0.0
;
double
rdma_one_sided_read_time_sum
=
0.0
;
//
// LOG STRUCTURE DECLARATION
Log
storage_log
;
std
::
atomic
<
int
>
req_cnt
(
0
);
// this is till implementation of clean exit is complete
int
max_req
=
500
;
bool
debug
=
false
;
bool
analyze
=
false
;
int
max_packet_size_bytes
=
0
;
chrono
::
duration
<
double
>
max_cq_poll_timeout
(
0.0
);
chrono
::
duration
<
double
>
response_buffer_add_timeout
(
0.05
);
// 10 ms, static for now (300 for testing)
chrono
::
duration
<
double
>
worker_response_buffer_poll_timeout
(
0.05
);
//5 ms, static for now (300 for testing)
std
::
unordered_map
<
uint32_t
,
RDMA_Transport
*>
common
::
rdma_transport_map
;
std
::
unordered_map
<
uint32_t
,
TCP_Transport
*>
common
::
tcp_transport_map
;
Thread_Safe_Queue
*
job_queue
=
new
Thread_Safe_Queue
();
ThreadSafe_Queue
<
struct
job_context
*>
*
send_queue
=
new
ThreadSafe_Queue
<
struct
job_context
*>
();
ThreadSafe_Queue
<
struct
job_context
*>
*
request_queue
=
new
ThreadSafe_Queue
<
struct
job_context
*>
();
Buffer
*
response_buffer
=
new
Buffer
(
response_buffer_size
);
int
num_servers
;
int
num_replicas
;
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/common.hpp
0 → 100644
View file @
d2d47b86
#ifndef __COMMON_H__
#define __COMMON_H__
#include <atomic>
#include <unordered_map>
#include <chrono>
#include <byteswap.h>
#include <stdint.h>
#include "Buffer.hpp"
#include "threadsafe_queue.hpp"
#include "general_threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
#include "log.hpp"
#if __BYTE_ORDER == LITTLE_ENDIAN
static
inline
uint64_t
htonll
(
uint64_t
x
)
{
return
bswap_64
(
x
);}
static
inline
uint64_t
ntohll
(
uint64_t
x
)
{
return
bswap_64
(
x
);}
#elif __BYTE_ORDER == BIG_ENDIAN
static
inline
uint64_t
htonll
(
uint64_t
x
)
{
return
x
;}
static
inline
uint64_t
ntohll
(
uint64_t
x
)
{
return
x
;}
#else
#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN
#endif
namespace
chrono
=
std
::
chrono
;
enum
Entity
{
NIC
,
SERVER
,
CLIENT
,
};
extern
enum
Entity
self_id
;
// const bool debug = true;
// const chrono::duration<double> max_poll_cq_timeout(0.005); //5 ms
// const int MAX_PACKET_SIZE = ( 1<<21 );
// TIME MEASUREMENT STRUCTURES
extern
std
::
vector
<
chrono
::
duration
<
double
>>
request_queue_wait_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
response_buffer_wait_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
send_queue_wait_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
client_rtt_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
worker_read_service_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
worker_write_service_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
append_log_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
master_backup_ack_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
rdma_one_sided_write_time
;
extern
std
::
vector
<
chrono
::
duration
<
double
>>
rdma_one_sided_read_time
;
extern
double
request_queue_wait_time_sum
;
extern
double
response_buffer_wait_time_sum
;
extern
double
send_queue_wait_time_sum
;
extern
double
client_rtt_time_sum
;
extern
double
worker_read_service_time_sum
;
extern
double
worker_write_service_time_sum
;
extern
double
append_log_time_sum
;
extern
double
master_backup_ack_time_sum
;
extern
double
rdma_one_sided_write_time_sum
;
extern
double
rdma_one_sided_read_time_sum
;
//
// LOG STRUCTURE DECLARATION
extern
struct
Log
storage_log
;
extern
std
::
atomic
<
int
>
req_cnt
;
// this is till implementation of clean exit is complete
extern
int
max_req
;
const
size_t
max_ip_cmd_len
=
1024
;
// for batch mode
const
size_t
response_buffer_size
=
100
;
extern
bool
debug
;
extern
bool
analyze
;
extern
chrono
::
duration
<
double
>
max_cq_poll_timeout
;
extern
chrono
::
duration
<
double
>
response_buffer_add_timeout
;
extern
chrono
::
duration
<
double
>
worker_response_buffer_poll_timeout
;
extern
int
max_packet_size_bytes
;
extern
class
Thread_Safe_Queue
*
job_queue
;
extern
class
ThreadSafe_Queue
<
struct
job_context
*>
*
send_queue
;
extern
class
ThreadSafe_Queue
<
struct
job_context
*>
*
request_queue
;
extern
class
Buffer
*
response_buffer
;
extern
int
num_servers
;
extern
int
num_replicas
;
//these are the conn objs
//this will do for testing and benchmarking
namespace
common
{
extern
std
::
unordered_map
<
uint32_t
,
RDMA_Transport
*>
rdma_transport_map
;
extern
std
::
unordered_map
<
uint32_t
,
TCP_Transport
*>
tcp_transport_map
;
}
// Just for convenience
// static so it doesn't give segfault before main
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
machine_allocation_ips
=
{
{
"ub-02"
,
"192.168.200.30"
},
{
"ub-02-nic"
,
"192.168.200.31"
},
{
"ub-04"
,
"192.168.200.50"
},
{
"ub-04-nic"
,
"192.168.200.51"
},
{
"ub-05"
,
"192.168.200.20"
},
{
"ub-05-nic"
,
"192.168.200.21"
},
{
"ub-08"
,
"192.168.200.40"
},
{
"ub-08-nic"
,
"192.168.200.41"
}
};
/**
* This enum provides symbolic names for the status values returned
* to applications by RAMCloud operations.
*
* 0 means success; anything else means that an error occurred.
* Not all status values can be returned by all operations.
*/
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
/**
* Used in conditional operations to specify conditions under
* which an operation should be aborted with an error.
*
* RejectRules are typically used to ensure consistency of updates;
* for example, we might want to update a value but only if it hasn't
* changed since the last time we read it. If a RejectRules object
* is passed to an operation, the operation will be aborted if any
* of the following conditions are satisfied:
* - doesntExist is nonzero and the object does not exist
* - exists is nonzero and the object does exist
* - versionLeGiven is nonzero and the object exists with a version
* less than or equal to givenVersion.
* - versionNeGiven is nonzero and the object exists with a version
* different from givenVersion.
*/
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
/**
* Selects the particular service that will handle a given rpc.
* A rpc may only be sent to one particular service; see ServiceMask for
* situations dealing with sets of services on a particular Server.
*/
enum
ServiceType
{
MASTER_SERVICE
,
BACKUP_SERVICE
,
COORDINATOR_SERVICE
,
ADMIN_SERVICE
,
INVALID_SERVICE
,
// One higher than the max.
};
/**
* Used in linearizable RPCs to check whether or not the RPC can be processed.
*/
struct
ClientLease
{
uint64_t
leaseId
;
/// A cluster unique id for a specific lease.
/// 0 is used to indicate invalid or expired id.
uint64_t
leaseExpiration
;
/// Cluster time after which the lease may have
/// become invalid.
uint64_t
timestamp
;
/// Cluster time when this lease information was
/// provided by the coordinator.
}
__attribute__
((
packed
));
/**
* This enum defines the choices for the "opcode" field in RPC
* headers, which selects a particular operation to perform. Each
* RAMCloud service implements a subset of these operations. If you
* change this table you must also reflect the changes in the following
* locations:
* - The method opcodeSymbol in WireFormat.cc.
* - WireFormatTest.cc's out-of-range test, if ILLEGAL_RPC_TYPE was changed.
* - You may need to modify the "callees" table in scripts/genLevels.py,
* which keeps track of which RPCs invoke which other RPCs.
*/
enum
Opcode
{
PING
=
7
,
PROXY_PING
=
8
,
KILL
=
9
,
CREATE_TABLE
=
10
,
GET_TABLE_ID
=
11
,
DROP_TABLE
=
12
,
READ
=
13
,
WRITE
=
14
,
REMOVE
=
15
,
ENLIST_SERVER
=
16
,
GET_SERVER_LIST
=
17
,
GET_TABLE_CONFIG
=
18
,
RECOVER
=
19
,
HINT_SERVER_CRASHED
=
20
,
RECOVERY_MASTER_FINISHED
=
21
,
ENUMERATE
=
22
,
SET_MASTER_RECOVERY_INFO
=
23
,
FILL_WITH_TEST_DATA
=
24
,
MULTI_OP
=
25
,
GET_METRICS
=
26
,
BACKUP_FREE
=
28
,
BACKUP_GETRECOVERYDATA
=
29
,
BACKUP_STARTREADINGDATA
=
31
,
BACKUP_WRITE
=
32
,
BACKUP_RECOVERYCOMPLETE
=
33
,
UPDATE_SERVER_LIST
=
35
,
BACKUP_STARTPARTITION
=
36
,
DROP_TABLET_OWNERSHIP
=
39
,
TAKE_TABLET_OWNERSHIP
=
40
,
GET_HEAD_OF_LOG
=
42
,
INCREMENT
=
43
,
PREP_FOR_MIGRATION
=
44
,
RECEIVE_MIGRATION_DATA
=
45
,
REASSIGN_TABLET_OWNERSHIP
=
46
,
MIGRATE_TABLET
=
47
,
IS_REPLICA_NEEDED
=
48
,
SPLIT_TABLET
=
49
,
GET_SERVER_STATISTICS
=
50
,
SET_RUNTIME_OPTION
=
51
,
GET_SERVER_CONFIG
=
52
,
GET_BACKUP_CONFIG
=
53
,
GET_MASTER_CONFIG
=
55
,
GET_LOG_METRICS
=
56
,
VERIFY_MEMBERSHIP
=
57
,
GET_RUNTIME_OPTION
=
58
,
GET_LEASE_INFO
=
59
,
RENEW_LEASE
=
60
,
SERVER_CONTROL
=
61
,
SERVER_CONTROL_ALL
=
62
,
GET_SERVER_ID
=
63
,
READ_KEYS_AND_VALUE
=
64
,
LOOKUP_INDEX_KEYS
=
65
,
READ_HASHES
=
66
,
INSERT_INDEX_ENTRY
=
67
,
REMOVE_INDEX_ENTRY
=
68
,
CREATE_INDEX
=
69
,
DROP_INDEX
=
70
,
DROP_INDEXLET_OWNERSHIP
=
71
,
TAKE_INDEXLET_OWNERSHIP
=
72
,
PREP_FOR_INDEXLET_MIGRATION
=
73
,
SPLIT_AND_MIGRATE_INDEXLET
=
74
,
COORD_SPLIT_AND_MIGRATE_INDEXLET
=
75
,
TX_DECISION
=
76
,
TX_PREPARE
=
77
,
TX_REQUEST_ABORT
=
78
,
TX_HINT_FAILED
=
79
,
ECHO
=
80
,
ILLEGAL_RPC_TYPE
=
81
,
// 1 + the highest legitimate Opcode
//modification
TYPE_REQUEST
=
82
,
// Request distinguisher
TYPE_RESPONSE
=
83
,
// Response distinguisher
};
/**
* Each RPC request starts with this structure.
*/
struct
RequestCommon
{
uint16_t
opcode
;
/// Opcode of operation to be performed.
uint16_t
service
;
/// ServiceType to invoke for this rpc.
}
__attribute__
((
packed
));
/**
* Each RPC response starts with this structure.
*/
struct
ResponseCommon
{
Status
status
;
// Indicates whether the operation
// succeeded; if not, it explains why.
}
__attribute__
((
packed
));
/**
* This struct describes the packet structure of a write packet
* key differences include the addition of the field uint64_t timestamp;
* to help measure latencies and the ifdef elsedef structure to allow the user
* to set the packet size using commandline macros at compile time
* default size is 100 bytes
*/
struct
Write
{
static
const
Opcode
opcode
=
WRITE
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint64_t
tableId
;
ClientLease
lease
;
uint64_t
rpcId
;
uint64_t
ackId
;
uint32_t
length
;
// Includes the total size of the
// keysAndValue blob in bytes.These
// follow immediately after this header
RejectRules
rejectRules
;
uint8_t
async
;
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
version
;
}
__attribute__
((
packed
));
};
struct
Read
{
static
const
Opcode
opcode
=
READ
;
static
const
ServiceType
service
=
MASTER_SERVICE
;
struct
Request
{
RequestCommon
common
;
uint16_t
keyLength
;
// Length of the key in bytes.
// The actual key follows
// immediately after this header.
RejectRules
rejectRules
;
}
__attribute__
((
packed
));
struct
Response
{
ResponseCommon
common
;
uint64_t
version
;
uint32_t
length
;
// Length of the object's value in bytes.
// The actual bytes of the object follow
// immediately after this header.
}
__attribute__
((
packed
));
};
////// A common struct for request //////
union
request
{
struct
Write
::
Request
w_request
;
struct
Write
::
Response
w_response
;
struct
Read
::
Request
r_request
;
struct
Read
::
Response
r_response
;
}
__attribute__
((
packed
));
struct
Common_Request
{
uint8_t
opcode
;
uint8_t
service_type
;
uint8_t
type
;
chrono
::
time_point
<
chrono
::
steady_clock
>
request_start_time
;
union
request
req
;
//key and value pair immediately follow
}
__attribute__
((
packed
));
#endif
Smit_MTP_RamCloud_Replication_Offload/include/connection_pool.cc
0 → 100644
View file @
d2d47b86
#ifndef __CONNECTION_POOL_C__
#define __CONNECTION_POOL_C__
#include <arpa/inet.h>
#include <sys/ioctl.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include "common.hpp"
#include "../config/read_config.hpp"
#include "connection_pool.hpp"
#include "../transport_api/transport_config.hpp"
Connection_Pool
::
Connection_Pool
()
{
FD_ZERO
(
&
(
this
->
cset
));
this
->
max_fd
=
0
;
this
->
common_port
=
-
1
;
this
->
common_socket_local_fd
=
-
1
;
this
->
common_tcp_conn
=
NULL
;
}
Connection_Pool
::
Connection_Pool
(
enum
Transport_Type
t
)
:
Connection_Pool
()
{
this
->
transport_type
=
t
;
}
// Connection_Pool::~Connection_Pool() {
// }
int
Connection_Pool
::
set_common_tcp_conn
(
int
port
)
{
if
(
this
->
common_tcp_conn
==
NULL
)
{
this
->
common_tcp_conn
=
new
TCP_Transport
(
port
);
}
this
->
common_port
=
port
;
if
(
this
->
common_tcp_conn
->
make_socket
())
{
//error
perror
(
"set_common_tcp_conn: Unable to create socket"
);
return
-
1
;
}
FD_SET
(
this
->
common_tcp_conn
->
get_local_fd
(),
&
(
this
->
cset
));
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
this
->
common_tcp_conn
->
get_local_fd
());
return
0
;
}
int
Connection_Pool
::
start_common_conn_listen
()
{
this
->
common_tcp_conn
->
start_listen
();
}
int
Connection_Pool
::
accept_conn
(
Params
*
parameters
)
{
if
(
this
->
common_tcp_conn
==
NULL
)
{
//error
perror
(
"accept_conn: Common tcp obj not created"
);
return
-
1
;
}
TCP_Transport
*
tcp_transport
=
NULL
;
RDMA_Transport
*
rdma_transport
=
NULL
;
int
conn_fd
;
tcp_transport
=
new
TCP_Transport
();
conn_fd
=
accept
(
this
->
common_tcp_conn
->
get_local_fd
(),
NULL
,
0
);
if
(
conn_fd
<
0
)
{
//error
perror
(
"accept_conn: Unable to accept connection"
);
return
-
1
;
}
tcp_transport
->
set_local_fd
(
this
->
common_tcp_conn
->
get_local_fd
());
tcp_transport
->
set_conn_fd
(
conn_fd
);
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
conn_fd
);
FD_SET
(
conn_fd
,
&
(
this
->
cset
));
switch
(
this
->
transport_type
)
{
case
TCP_IP_TRANSPORT
:
this
->
tcp_connection_pool
.
push_back
(
tcp_transport
);
this
->
tcp_transport_map
[
conn_fd
]
=
tcp_transport
;
common
::
tcp_transport_map
[
common
::
tcp_transport_map
.
size
()]
=
tcp_transport
;
break
;
case
RDMA_RC_TRANSPORT
:
rdma_transport
=
new
RDMA_Transport
(
tcp_transport
);
rdma_transport
->
rdma_config
->
mr
.
mr_size
=
parameters
->
rdma_mr_size_bytes
;
rdma_transport
->
rdma_config
->
mtu
=
parameters
->
rdma_mtu_size_bytes
;
rdma_transport
->
rdma_config
->
min_rnr_timer
=
parameters
->
rdma_min_rnr_timer
;
rdma_transport
->
rdma_config
->
timeout
=
parameters
->
rdma_timeout
;
rdma_transport
->
rdma_config
->
retry_cnt
=
parameters
->
rdma_retry_cnt
;
rdma_transport
->
rdma_config
->
ib_port
=
parameters
->
rdma_ib_port
;
rdma_transport
->
rdma_config
->
gid_idx
=
parameters
->
rdma_gid_idx
;
rdma_transport
->
rdma_setup_no_tcp_setup
();
this
->
rdma_connection_pool
.
push_back
(
rdma_transport
);
this
->
rdma_transport_map
[
conn_fd
]
=
rdma_transport
;
common
::
rdma_transport_map
[
common
::
rdma_transport_map
.
size
()]
=
rdma_transport
;
break
;
}
return
0
;
}
// Sets a common socket which can then accept
// multiple conns. Useful for servers
int
Connection_Pool
::
make_common_socket
(
int
port
)
{
this
->
common_port
=
port
;
if
(
port
<
0
)
{
// error
return
-
1
;
}
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
this
->
common_port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
this
->
common_socket_local_fd
=
socket
(
host_addr
.
sin_family
,
SOCK_STREAM
,
0
);
if
(
this
->
common_socket_local_fd
<
0
)
{
//error
return
-
1
;
}
FD_SET
(
this
->
common_socket_local_fd
,
&
(
this
->
cset
));
return
0
;
}
// make a connection obj on the
// common socket formed
// Need to form a common socket first
// int Connection_Pool::make_connection_common_socket(std::string addr) {
// int conn_fd;
// TCP_Transport *tcp_transport = NULL;
// RDMA_Transport *rdma_transport = NULL;
// if(debug) {
// printf("Initiating connection on the common socket to: %s %d\n", addr.c_str(), this->common_port);
// }
// switch(this->transport_type) {
// case TCP_IP_TRANSPORT:
// tcp_transport = new TCP_Transport(addr, this->common_port);
// tcp_transport->set_local_fd(this->common_socket_local_fd);
// tcp_transport->accept_conn();
// conn_fd = tcp_transport->get_conn_fd();
// this->max_fd = std::max(this->max_fd, std::max(this->common_socket_local_fd, conn_fd));
// this->tcp_connection_pool.push_back(tcp_transport);
// FD_SET(conn_fd, &(this->cset));
// this->tcp_transport_map[conn_fd] = tcp_transport;
// break;
// case RDMA_RC_TRANSPORT:
// rdma_transport = new RDMA_Transport(addr, this->common_port);
// }
// }
int
Connection_Pool
::
make_connection
(
std
::
string
addr
,
int
port
)
{
int
local_fd
,
conn_fd
;
TCP_Transport
*
tcp_transport
=
NULL
;
RDMA_Transport
*
rdma_transport
=
NULL
;
if
(
debug
)
{
printf
(
"Initiating connection to: %s %d
\n
"
,
addr
.
c_str
(),
port
);
}
switch
(
this
->
transport_type
)
{
case
TCP_IP_TRANSPORT
:
tcp_transport
=
new
TCP_Transport
(
addr
,
port
);
//this->tcp_connection_pool.push_back(tcp_transport);
tcp_transport
->
setup
();
local_fd
=
tcp_transport
->
get_local_fd
();
conn_fd
=
tcp_transport
->
get_conn_fd
();
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
std
::
max
(
local_fd
,
conn_fd
));
this
->
tcp_connection_pool
.
push_back
(
tcp_transport
);
FD_SET
(
conn_fd
,
&
(
this
->
cset
));
if
(
conn_fd
!=
local_fd
)
{
FD_SET
(
local_fd
,
&
(
this
->
cset
));
}
this
->
tcp_transport_map
[
local_fd
]
=
tcp_transport
;
this
->
tcp_transport_map
[
conn_fd
]
=
tcp_transport
;
common
::
tcp_transport_map
[
common
::
tcp_transport_map
.
size
()]
=
tcp_transport
;
break
;
case
RDMA_RC_TRANSPORT
:
rdma_transport
=
new
RDMA_Transport
(
addr
,
port
);
//this->rdma_connection_pool.push_back(rdma_transport);
rdma_transport
->
rdma_setup
();
local_fd
=
rdma_transport
->
get_local_fd
();
conn_fd
=
rdma_transport
->
get_conn_fd
();
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
std
::
max
(
local_fd
,
conn_fd
));
this
->
rdma_connection_pool
.
push_back
(
rdma_transport
);
FD_SET
(
conn_fd
,
&
(
this
->
cset
));
if
(
conn_fd
!=
local_fd
)
{
FD_SET
(
local_fd
,
&
(
this
->
cset
));
}
this
->
rdma_transport_map
[
local_fd
]
=
rdma_transport
;
this
->
rdma_transport_map
[
conn_fd
]
=
rdma_transport
;
common
::
rdma_transport_map
[
common
::
rdma_transport_map
.
size
()]
=
rdma_transport
;
break
;
}
return
0
;
}
int
Connection_Pool
::
make_connection
(
std
::
string
addr
,
int
port
,
Params
params
)
{
int
local_fd
,
conn_fd
;
TCP_Transport
*
tcp_transport
=
NULL
;
RDMA_Transport
*
rdma_transport
=
NULL
;
if
(
debug
)
{
printf
(
"Initiating connection to: %s %d
\n
"
,
addr
.
c_str
(),
port
);
}
switch
(
this
->
transport_type
)
{
case
TCP_IP_TRANSPORT
:
tcp_transport
=
new
TCP_Transport
(
addr
,
port
);
tcp_transport
->
setup
();
local_fd
=
tcp_transport
->
get_local_fd
();
conn_fd
=
tcp_transport
->
get_conn_fd
();
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
std
::
max
(
local_fd
,
conn_fd
));
this
->
tcp_connection_pool
.
push_back
(
tcp_transport
);
FD_SET
(
conn_fd
,
&
(
this
->
cset
));
if
(
conn_fd
!=
local_fd
)
{
FD_SET
(
local_fd
,
&
(
this
->
cset
));
}
this
->
tcp_transport_map
[
local_fd
]
=
tcp_transport
;
this
->
tcp_transport_map
[
conn_fd
]
=
tcp_transport
;
common
::
tcp_transport_map
[
common
::
tcp_transport_map
.
size
()]
=
tcp_transport
;
break
;
case
RDMA_RC_TRANSPORT
:
rdma_transport
=
new
RDMA_Transport
(
addr
,
port
);
rdma_transport
->
rdma_config
->
mr
.
mr_size
=
params
.
rdma_mr_size_bytes
;
rdma_transport
->
rdma_config
->
mtu
=
params
.
rdma_mtu_size_bytes
;
rdma_transport
->
rdma_config
->
min_rnr_timer
=
params
.
rdma_min_rnr_timer
;
rdma_transport
->
rdma_config
->
timeout
=
params
.
rdma_timeout
;
rdma_transport
->
rdma_config
->
retry_cnt
=
params
.
rdma_retry_cnt
;
rdma_transport
->
rdma_config
->
ib_port
=
params
.
rdma_ib_port
;
rdma_transport
->
rdma_config
->
gid_idx
=
params
.
rdma_gid_idx
;
rdma_transport
->
rdma_setup
();
local_fd
=
rdma_transport
->
get_local_fd
();
conn_fd
=
rdma_transport
->
get_conn_fd
();
this
->
max_fd
=
std
::
max
(
this
->
max_fd
,
std
::
max
(
local_fd
,
conn_fd
));
this
->
rdma_connection_pool
.
push_back
(
rdma_transport
);
FD_SET
(
conn_fd
,
&
(
this
->
cset
));
if
(
conn_fd
!=
local_fd
)
{
FD_SET
(
local_fd
,
&
(
this
->
cset
));
}
this
->
rdma_transport_map
[
local_fd
]
=
rdma_transport
;
this
->
rdma_transport_map
[
conn_fd
]
=
rdma_transport
;
common
::
rdma_transport_map
[
common
::
rdma_transport_map
.
size
()]
=
rdma_transport
;
break
;
}
return
0
;
}
std
::
vector
<
TCP_Transport
*>
Connection_Pool
::
get_tcp_request_conns
()
{
std
::
vector
<
TCP_Transport
*>
ret_transports
;
std
::
vector
<
int
>
fds
;
fd_set
tset
;
tset
=
this
->
cset
;
int
n
;
n
=
select
(
this
->
max_fd
+
1
,
&
tset
,
NULL
,
NULL
,
NULL
);
for
(
int
i
=
0
;
i
<
this
->
max_fd
+
1
;
i
++
)
{
if
(
FD_ISSET
(
i
,
&
tset
))
{
fds
.
push_back
(
i
);
}
}
for
(
int
i
:
fds
)
{
ret_transports
.
push_back
(
this
->
tcp_transport_map
[
i
]);
}
return
ret_transports
;
}
std
::
vector
<
RDMA_Transport
*>
Connection_Pool
::
get_rdma_request_conns
()
{
std
::
vector
<
RDMA_Transport
*>
ret_transports
;
std
::
vector
<
int
>
fds
;
fd_set
tset
;
struct
timeval
tv
;
// for select timeout
tv
.
tv_sec
=
0
;
// To make select return
tv
.
tv_usec
=
0
;
// immediately after checking once
tset
=
this
->
cset
;
int
n
;
n
=
select
(
this
->
max_fd
+
1
,
&
tset
,
NULL
,
NULL
,
&
tv
);
// TCP_Transport *temp_tcp;
// char* buf;
for
(
int
i
=
0
;
i
<
this
->
max_fd
+
1
;
i
++
)
{
if
(
FD_ISSET
(
i
,
&
tset
))
{
fds
.
push_back
(
i
);
// temp_tcp = this->rdma_transport_map[i]->get_tcp_conn();
// buf = NULL;
// temp_tcp->recv_data(&buf);
// free(buf);
// n = 0;
// ioctl(i, FIONREAD, &n);
// if(n==0) {
// //std::cout<<"Connection to fd "<<i<<" is closed"<<std::endl;
// FD_CLR(i, &this->cset);
// }
}
}
for
(
int
i
:
fds
)
{
ret_transports
.
push_back
(
this
->
rdma_transport_map
[
i
]);
}
return
ret_transports
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/connection_pool.hpp
0 → 100644
View file @
d2d47b86
#ifndef __CONNECTION_POOL_H__
#define __CONNECTION_POOL_H__
#include <vector>
#include <unordered_map>
#include "../config/read_config.hpp"
#include "../transport_api/transport_config.hpp"
extern
fd_set
cset
,
rset
;
// extern std::vector<TCP_Transport*> tcp_connection_pool;
// extern std::vector<RDMA_Transport*> rdma_connection_pool;
class
Connection_Pool
{
public:
enum
Transport_Type
transport_type
;
TCP_Transport
*
common_tcp_conn
;
int
common_port
;
int
common_socket_local_fd
;
fd_set
cset
;
std
::
vector
<
TCP_Transport
*>
tcp_connection_pool
;
std
::
vector
<
RDMA_Transport
*>
rdma_connection_pool
;
std
::
unordered_map
<
int
,
TCP_Transport
*>
tcp_transport_map
;
std
::
unordered_map
<
int
,
RDMA_Transport
*>
rdma_transport_map
;
int
max_fd
;
Connection_Pool
();
Connection_Pool
(
enum
Transport_Type
t
);
Connection_Pool
(
enum
Transport_Type
t
,
int
port
);
//~Connection_Pool();
int
set_common_tcp_conn
(
int
port
);
int
start_common_conn_listen
();
int
accept_conn
(
Params
*
parameters
);
int
make_common_socket
(
int
port
);
//int make_connection_common_socket(std::string addr);
int
make_connection
(
std
::
string
addr
,
int
port
);
int
make_connection
(
std
::
string
addr
,
int
port
,
Params
params
);
std
::
vector
<
TCP_Transport
*>
get_tcp_request_conns
();
std
::
vector
<
RDMA_Transport
*>
get_rdma_request_conns
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.cc
0 → 100644
View file @
d2d47b86
#ifndef __DISPATCHER_C__
#define __DISPATCHER_C__
#include <algorithm>
#include <chrono>
#include <vector>
#include "../config/read_config.hpp"
#include "common.hpp"
#include "connection_pool.hpp"
#include "client_functions.hpp"
#include "dispatcher.hpp"
#include "thread_functions.hpp"
#include "../transport_api/transport_config.hpp"
namespace
chrono
=
std
::
chrono
;
Dispatcher
::
Dispatcher
(
enum
Transport_Type
t
,
int
num_threads
)
{
this
->
transport_type
=
t
;
this
->
conn_pool
=
new
Connection_Pool
(
t
);
this
->
thread_pool
=
new
Thread_Pool
(
num_threads
);
this
->
thread_pool
->
set_function
(
worker_function
);
this
->
thread_pool
->
start_threads
();
}
Dispatcher
::
Dispatcher
(
enum
Transport_Type
t
,
int
num_threads
,
void
*
func
(
void
*
))
{
this
->
transport_type
=
t
;
this
->
conn_pool
=
new
Connection_Pool
(
t
);
this
->
thread_pool
=
new
Thread_Pool
(
num_threads
);
this
->
thread_pool
->
set_function
(
func
);
this
->
thread_pool
->
start_threads
();
}
// Dispatcher::~Dispatcher() {
// }
int
Dispatcher
::
setup_common_tcp_conn
(
int
port
)
{
this
->
conn_pool
->
set_common_tcp_conn
(
port
);
}
int
Dispatcher
::
common_socket_start_listen
()
{
this
->
conn_pool
->
start_common_conn_listen
();
}
// Makes a connection on its common tcp conn obj
int
Dispatcher
::
add_conn_on_common_tcp
(
Params
parameters
)
{
return
this
->
conn_pool
->
accept_conn
(
&
parameters
);
}
// Makes connection in its Connection_Pool
int
Dispatcher
::
add_connection
(
std
::
string
addr
,
int
port
)
{
return
this
->
conn_pool
->
make_connection
(
addr
,
port
);
}
int
Dispatcher
::
add_connection
(
std
::
string
addr
,
int
port
,
Params
params
)
{
return
this
->
conn_pool
->
make_connection
(
addr
,
port
,
params
);
}
std
::
vector
<
job_context
*>
Dispatcher
::
get_jobs
()
{
std
::
vector
<
TCP_Transport
*>
tcp_transports
;
std
::
vector
<
RDMA_Transport
*>
rdma_transports
;
ssize_t
n
;
std
::
vector
<
job_context
*>
active_jobs
;
char
*
buf
=
NULL
;
job_context
*
job
;
Common_Request
*
cr
;
char
*
tbuf
=
NULL
;
TCP_Transport
*
temp_tcp
;
switch
(
this
->
transport_type
)
{
case
TCP_IP_TRANSPORT
:
tcp_transports
=
this
->
conn_pool
->
get_tcp_request_conns
();
for
(
TCP_Transport
*
transport
:
tcp_transports
)
{
n
=
transport
->
recv_data
(
&
buf
);
cr
=
(
Common_Request
*
)
buf
;
job
=
new
job_context
(
transport
,
cr
->
opcode
);
job
->
service_type
=
cr
->
service_type
;
job
->
job_type
=
cr
->
type
;
job
->
request_packet
=
buf
;
job
->
request
=
(
Common_Request
*
)
buf
;
job
->
transport_type
=
TCP_IP_TRANSPORT
;
active_jobs
.
push_back
(
job
);
}
break
;
case
RDMA_RC_TRANSPORT
:
rdma_transports
=
this
->
conn_pool
->
get_rdma_request_conns
();
for
(
RDMA_Transport
*
transport
:
rdma_transports
)
{
temp_tcp
=
transport
->
get_tcp_conn
();
if
(
tbuf
!=
NULL
)
{
free
(
tbuf
);
}
tbuf
=
NULL
;
temp_tcp
->
recv_data
(
&
tbuf
);
cr
=
(
Common_Request
*
)
tbuf
;
switch
(
cr
->
type
)
{
case
TYPE_REQUEST
:
switch
(
transport_type
)
{
case
TCP_IP_TRANSPORT
:
//pass for now
break
;
case
RDMA_RC_TRANSPORT
:
if
(
transport
->
one_sided_read
())
{
//error
perror
(
"Unable to read rdma data"
);
continue
;
}
cr
=
(
Common_Request
*
)
transport
->
get_mr_addr
();
job
=
new
job_context
(
transport
,
cr
->
opcode
);
job
->
service_type
=
cr
->
service_type
;
job
->
job_type
=
cr
->
type
;
job
->
request_packet
=
transport
->
get_mr_addr
();
job
->
request
=
cr
;
job
->
transport_type
=
RDMA_RC_TRANSPORT
;
active_jobs
.
push_back
(
job
);
if
(
tbuf
!=
NULL
)
{
free
(
tbuf
);
tbuf
=
NULL
;
}
break
;
}
break
;
case
TYPE_RESPONSE
:
switch
(
transport_type
)
{
case
TCP_IP_TRANSPORT
:
//pass for now
break
;
case
RDMA_RC_TRANSPORT
:
cr
=
(
Common_Request
*
)
tbuf
;
job
=
new
job_context
(
transport
,
cr
->
opcode
);
job
->
service_type
=
cr
->
service_type
;
job
->
job_type
=
cr
->
type
;
job
->
request_packet
=
tbuf
;
job
->
request
=
cr
;
job
->
transport_type
=
RDMA_RC_TRANSPORT
;
active_jobs
.
push_back
(
job
);
tbuf
=
NULL
;
break
;
}
break
;
}
}
break
;
}
return
active_jobs
;
}
void
Dispatcher
::
assign_jobs
(
std
::
vector
<
job_context
*>
jobs
)
{
int
response_buffer_found
;
auto
start
=
chrono
::
steady_clock
::
now
();
auto
end
=
chrono
::
steady_clock
::
now
();
chrono
::
duration
<
double
>
elapsed_time
;
for
(
job_context
*
job
:
jobs
)
{
// job_queue->enqueue(job);
start
=
chrono
::
steady_clock
::
now
();
switch
(
job
->
job_type
)
{
case
TYPE_REQUEST
:
if
(
analyze
)
{
job
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
request_queue
->
enqueue
(
job
);
break
;
case
TYPE_RESPONSE
:
switch
(
transport_type
)
{
case
TCP_IP_TRANSPORT
:
do
{
response_buffer_found
=
response_buffer
->
add_element
(
job
->
tcp_transport
->
get_conn_fd
(),
job
,
job
->
request
);
end
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_found
!=
0
&&
(
end
-
start
)
<
response_buffer_add_timeout
);
break
;
case
RDMA_RC_TRANSPORT
:
do
{
if
(
analyze
)
{
job
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
response_buffer_found
=
response_buffer
->
add_element
(
job
->
rdma_transport
->
get_conn_fd
(),
job
,
job
->
request
);
end
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_found
!=
0
&&
(
end
-
start
)
<
response_buffer_add_timeout
);
break
;
}
break
;
}
}
}
void
Dispatcher
::
service_send_queue
()
{
size_t
send_queue_size
=
send_queue
->
q
.
size
();
job_context
*
job
;
TCP_Transport
*
tcp_transport
;
size_t
blob_size
;
for
(
size_t
i
=
0
;
i
<
send_queue_size
;
i
++
)
{
job
=
send_queue
->
get_job
();
switch
(
job
->
job_type
)
{
case
TYPE_REQUEST
:
blob_size
=
sizeof
(
Common_Request
)
+
job
->
request
->
req
.
w_request
.
length
;
break
;
case
TYPE_RESPONSE
:
blob_size
=
sizeof
(
Common_Request
);
}
switch
(
transport_type
)
{
case
TCP_IP_TRANSPORT
:
tcp_transport
=
job
->
tcp_transport
;
break
;
case
RDMA_RC_TRANSPORT
:
tcp_transport
=
job
->
rdma_transport
->
get_tcp_conn
();
break
;
}
// In case of NIC: We only need to send tcp prep requests
// In case of Server: We only need to send tcp (n)acks
// In case of Client: We only need to send tcp reqs
// Only RDMA reads are used which are preceeded by tcp requests
// so the actual transport does not depend on transport type
tcp_transport
->
mr
=
job
->
request_packet
;
tcp_transport
->
mr_size
=
blob_size
;
tcp_transport
->
send_data
();
if
(
analyze
)
{
send_queue_wait_time
.
push_back
(
chrono
::
steady_clock
::
now
()
-
job
->
job_post_time
);
}
tcp_transport
->
mr
=
NULL
;
tcp_transport
->
mr_size
=
0
;
// free(job->request_packet);
delete
(
job
);
}
}
int
Dispatcher
::
issue_cmd
(
std
::
vector
<
std
::
string
>
tokens
)
{
if
(
tokens
.
size
()
==
0
)
return
0
;
// empty command
if
(
tokens
.
size
()
==
1
)
{
// invalid command str
return
-
1
;
}
std
::
vector
<
std
::
string
>::
iterator
it
;
std
::
string
key
,
value
;
std
::
string
cmd
=
tokens
[
0
];
std
::
transform
(
cmd
.
begin
(),
cmd
.
end
(),
cmd
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
}
);
if
(
cmd
.
compare
(
"get"
)
==
0
)
{
//sanity check
if
(
tokens
.
size
()
!=
2
)
{
// invalid get format
return
-
1
;
}
// tokens[1]..tokens[size] are keys to get
it
=
tokens
.
begin
();
it
++
;
key
=
*
it
;
// this is the key for get
if
(
this
->
transport_type
==
RDMA_RC_TRANSPORT
)
{
if
(
debug
)
{
printf
(
"Issuing GET Request (RDMA)
\n
"
);
}
if
(
read_rpc
((
this
->
conn_pool
->
rdma_connection_pool
[
0
]),
key
.
c_str
(),
key
.
size
()))
{
//error
perror
(
"issue_cmd: read_rpc rdma fail"
);
return
-
1
;
}
}
else
if
(
this
->
transport_type
==
TCP_IP_TRANSPORT
)
{
if
(
debug
)
{
printf
(
"Issuing PUT Request (RDMA)
\n
"
);
}
if
(
read_rpc
((
this
->
conn_pool
->
tcp_connection_pool
[
0
]),
key
,
key
.
size
()))
{
perror
(
"issue_cmd: read_rpc tcp fail"
);
return
-
1
;
}
}
}
else
if
(
cmd
.
compare
(
"put"
)
==
0
)
{
// sanity check
if
(
tokens
.
size
()
!=
3
)
{
// invalid put format
return
-
1
;
}
// only accept cmd key value format for put command
it
=
tokens
.
begin
();
it
++
;
key
=
*
it
;
it
++
;
value
=
*
it
;
if
(
this
->
transport_type
==
RDMA_RC_TRANSPORT
)
{
if
(
debug
)
{
printf
(
"Issuing PUT request (RDMA)
\n
"
);
}
if
(
write_rpc
(
this
->
conn_pool
->
rdma_connection_pool
[
0
],
key
.
c_str
(),
key
.
size
(),
value
.
c_str
(),
value
.
size
())
)
{
//error
return
-
1
;
}
}
else
if
(
this
->
transport_type
==
TCP_IP_TRANSPORT
)
{
if
(
write_rpc
(
this
->
conn_pool
->
tcp_connection_pool
[
0
],
key
.
c_str
(),
key
.
size
(),
value
.
c_str
(),
value
.
size
())
)
{
//error
return
-
1
;
}
}
}
else
if
(
cmd
.
compare
(
"exit"
)
==
0
)
{
return
1
;
}
else
{
//error
return
-
1
;
}
return
0
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/dispatcher.hpp
0 → 100644
View file @
d2d47b86
#ifndef __DISPATCHER_H__
#define __DISPATCHER_H__
#include <string>
#include <vector>
#include "../config/read_config.hpp"
#include "connection_pool.hpp"
#include "thread_pool.hpp"
#include "../transport_api/transport_config.hpp"
class
Dispatcher
{
public:
enum
Transport_Type
transport_type
;
Connection_Pool
*
conn_pool
;
Thread_Pool
*
thread_pool
;
int
get_num_active_requests
();
Dispatcher
(
enum
Transport_Type
t
,
int
num_threads
);
Dispatcher
(
enum
Transport_Type
t
,
int
num_threads
,
void
*
func
(
void
*
));
//~Dispatcher();
int
setup_common_tcp_conn
(
int
port
);
int
common_socket_start_listen
();
int
add_conn_on_common_tcp
(
Params
parameters
);
int
add_connection
(
std
::
string
addr
,
int
port
);
int
add_connection
(
std
::
string
addr
,
int
port
,
Params
params
);
std
::
vector
<
job_context
*>
get_jobs
();
void
assign_jobs
(
std
::
vector
<
job_context
*>
jobs
);
int
issue_cmd
(
std
::
vector
<
std
::
string
>
tokens
);
void
service_send_queue
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/general_threadsafe_queue.hpp
0 → 100644
View file @
d2d47b86
#ifndef __GENERAL_THREADSAFE_QUEUE_H__
#define __GENERAL_THREADSAFE_QUEUE_H__
#include <queue>
#include <pthread.h>
#include "queue_context.hpp"
template
<
typename
T
>
class
ThreadSafe_Queue
{
public:
std
::
queue
<
T
>
q
;
pthread_mutex_t
queue_lock
;
size_t
queue_size
;
ThreadSafe_Queue
();
ThreadSafe_Queue
(
size_t
n
,
T
jobs
[]);
~
ThreadSafe_Queue
();
void
enqueue
(
T
job
);
void
dequeue
();
T
front
();
T
get_job
();
};
template
<
typename
T
>
ThreadSafe_Queue
<
T
>::
ThreadSafe_Queue
()
{
if
(
pthread_mutex_init
(
&
(
this
->
queue_lock
),
NULL
))
{
perror
(
"Mutex creation error"
);
exit
(
-
1
);
}
}
template
<
typename
T
>
ThreadSafe_Queue
<
T
>::
ThreadSafe_Queue
(
size_t
n
,
T
jobs
[])
:
ThreadSafe_Queue
()
{
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
this
->
q
.
push
(
jobs
[
i
]);
}
}
template
<
typename
T
>
ThreadSafe_Queue
<
T
>::~
ThreadSafe_Queue
()
{
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
pthread_mutex_destroy
(
&
(
this
->
queue_lock
));
T
*
t
;
while
(
!
(
this
->
q
.
empty
()))
{
t
=
this
->
q
.
front
();
this
->
q
.
pop
();
delete
(
t
);
}
}
template
<
typename
T
>
void
ThreadSafe_Queue
<
T
>::
enqueue
(
T
job
)
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
push
(
job
);
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
template
<
typename
T
>
void
ThreadSafe_Queue
<
T
>::
dequeue
()
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
pop
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
template
<
typename
T
>
T
ThreadSafe_Queue
<
T
>::
front
()
{
T
t
;
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
t
=
this
->
q
.
front
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
t
;
}
template
<
typename
T
>
T
ThreadSafe_Queue
<
T
>::
get_job
()
{
T
t
=
NULL
;
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
if
(
this
->
q
.
empty
())
{
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
NULL
;
}
t
=
this
->
q
.
front
();
// precaution
if
(
t
==
NULL
)
{
this
->
q
.
pop
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
NULL
;
}
this
->
q
.
pop
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
t
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/hash.cc
0 → 100644
View file @
d2d47b86
#ifndef __HASH_C__
#define __HASH_C__
#include <stdint.h>
#include "hash.hpp"
uint32_t
hash
(
uint64_t
key
,
int
n
)
{
uint64_t
t_n
=
static_cast
<
uint64_t
>
(
n
);
uint64_t
hash_val
=
key
%
t_n
;
return
static_cast
<
uint32_t
>
(
hash_val
);
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/hash.hpp
0 → 100644
View file @
d2d47b86
#ifndef __HASH_H__
#define __HASH_H__
#include <stdint.h>
uint32_t
hash
(
uint64_t
key
,
int
n
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/monitor.cc
0 → 100644
View file @
d2d47b86
#ifndef __MONITOR_CC__
#define __MONITOR_CC__
#include <chrono>
#include <iostream>
#include <pthread.h>
#include <stdio.h>
#include <vector>
#include "monitor.hpp"
namespace
chrono
=
std
::
chrono
;
Monitor
::
Monitor
()
{
this
->
num_requests
=
0
;
this
->
num_read_requests
=
0
;
this
->
num_write_requests
=
0
;
this
->
num_succ_requests
=
0
;
this
->
num_err_requests
=
0
;
this
->
num_dropped_requests
=
0
;
this
->
num_read_succ_requests
=
0
;
this
->
num_read_err_requests
=
0
;
this
->
num_write_succ_requests
=
0
;
this
->
num_write_err_requests
=
0
;
this
->
request_throughput
=
0.0
;
this
->
read_throughput
=
0.0
;
this
->
write_throughput
=
0.0
;
pthread_mutex_init
(
&
(
this
->
num_requests_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_read_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_write_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_succ_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_err_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_dropped_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_read_succ_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_read_err_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_write_succ_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
num_write_err_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
sum_read_lat_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
sum_write_lat_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
sum_replicate_lat_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
sum_data_transfer_lat_lock
),
NULL
);
pthread_mutex_init
(
&
(
this
->
sum_total_time_lock
),
NULL
);
}
long
long
Monitor
::
get_num_requests
()
{
return
this
->
num_requests
;
}
long
long
Monitor
::
get_num_read_requests
()
{
return
this
->
num_read_requests
;
}
long
long
Monitor
::
get_num_write_requests
()
{
return
this
->
num_write_requests
;
}
long
long
Monitor
::
get_num_succ_requests
()
{
return
this
->
num_succ_requests
;
}
long
long
Monitor
::
get_num_err_requests
()
{
return
this
->
num_err_requests
;
}
long
long
Monitor
::
get_num_dropped_requests
()
{
return
this
->
num_dropped_requests
;
}
long
long
Monitor
::
get_num_read_succ_requests
()
{
return
this
->
num_read_succ_requests
;
}
long
long
Monitor
::
get_num_read_err_requests
()
{
return
this
->
num_read_err_requests
;
}
long
long
Monitor
::
get_num_write_succ_requests
()
{
return
this
->
num_write_succ_requests
;
}
long
long
Monitor
::
get_num_write_err_requests
()
{
return
this
->
num_write_err_requests
;
}
void
Monitor
::
incr_num_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_requests_lock
));
this
->
num_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_requests_lock
));
}
void
Monitor
::
incr_num_read_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_read_lock
));
this
->
num_read_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_read_lock
));
}
void
Monitor
::
incr_num_write_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_write_lock
));
this
->
num_write_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_write_lock
));
}
void
Monitor
::
incr_num_succ_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_succ_lock
));
this
->
num_succ_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_succ_lock
));
}
void
Monitor
::
incr_num_err_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_err_lock
));
this
->
num_err_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_err_lock
));
}
void
Monitor
::
incr_num_dropped_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_dropped_lock
));
this
->
num_dropped_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_dropped_lock
));
}
void
Monitor
::
incr_num_read_succ_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_read_succ_lock
));
this
->
num_read_succ_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_read_succ_lock
));
}
void
Monitor
::
incr_num_read_err_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_read_err_lock
));
this
->
num_read_err_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_read_err_lock
));
}
void
Monitor
::
incr_num_write_succ_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_write_succ_lock
));
this
->
num_write_succ_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_write_succ_lock
));
}
void
Monitor
::
incr_num_write_err_requests
(
int
i
)
{
pthread_mutex_lock
(
&
(
this
->
num_write_err_lock
));
this
->
num_write_err_requests
++
;
pthread_mutex_unlock
(
&
(
this
->
num_write_err_lock
));
}
void
Monitor
::
incr_sum_read_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
)
{
pthread_mutex_lock
(
&
(
this
->
sum_read_lat_lock
));
this
->
sum_read_latency
+=
lat
;
pthread_mutex_unlock
(
&
(
this
->
sum_read_lat_lock
));
return
;
}
void
Monitor
::
incr_sum_write_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
)
{
pthread_mutex_lock
(
&
(
this
->
sum_write_lat_lock
));
this
->
sum_write_latency
+=
lat
;
pthread_mutex_unlock
(
&
(
this
->
sum_write_lat_lock
));
return
;
}
void
Monitor
::
incr_sum_replicate_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
)
{
pthread_mutex_lock
(
&
(
this
->
sum_replicate_lat_lock
));
this
->
sum_replicate_latency
+=
lat
;
pthread_mutex_unlock
(
&
(
this
->
sum_replicate_lat_lock
));
return
;
}
void
Monitor
::
incr_sum_data_transfer_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
)
{
pthread_mutex_lock
(
&
(
this
->
sum_data_transfer_lat_lock
));
this
->
sum_data_transfer_latency
+=
lat
;
pthread_mutex_unlock
(
&
(
this
->
sum_data_transfer_lat_lock
));
return
;
}
void
Monitor
::
incr_sum_total_time_taken
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
)
{
pthread_mutex_lock
(
&
(
this
->
sum_total_time_lock
));
this
->
total_time_taken
+=
lat
;
pthread_mutex_unlock
(
&
(
this
->
sum_total_time_lock
));
return
;
}
double
Monitor
::
get_request_throughput
()
{
if
(
this
->
num_requests
==
0
)
return
0.0
;
else
return
(
double
)
(
this
->
total_time_taken
.
count
()
/
this
->
num_requests
);
}
double
Monitor
::
get_read_throughput
()
{
if
(
this
->
num_read_requests
==
0
)
return
0.0
;
else
return
(
double
)
(
this
->
sum_read_latency
.
count
()
/
this
->
num_read_requests
);
}
double
Monitor
::
get_write_throughput
()
{
if
(
this
->
num_write_requests
==
0
)
return
0.0
;
else
return
(
double
)
(
this
->
sum_write_latency
.
count
()
/
this
->
num_write_requests
);
}
std
::
vector
<
double
>
Monitor
::
get_all_throughput
()
{
std
::
vector
<
double
>
tput_vect
;
// sequence of storage: read, write, total
tput_vect
.
push_back
(
this
->
get_read_throughput
());
tput_vect
.
push_back
(
this
->
get_write_throughput
());
tput_vect
.
push_back
(
this
->
get_request_throughput
());
return
tput_vect
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/monitor.hpp
0 → 100644
View file @
d2d47b86
#ifndef __MONITOR_H__
#define __MONITOR_H__
#include <chrono>
#include <pthread.h>
#include <vector>
namespace
chrono
=
std
::
chrono
;
class
Monitor
{
private:
pthread_mutex_t
num_requests_lock
;
pthread_mutex_t
num_read_lock
;
pthread_mutex_t
num_write_lock
;
pthread_mutex_t
num_succ_lock
;
pthread_mutex_t
num_err_lock
;
pthread_mutex_t
num_dropped_lock
;
pthread_mutex_t
num_read_succ_lock
;
pthread_mutex_t
num_read_err_lock
;
pthread_mutex_t
num_write_succ_lock
;
pthread_mutex_t
num_write_err_lock
;
pthread_mutex_t
sum_read_lat_lock
;
pthread_mutex_t
sum_write_lat_lock
;
pthread_mutex_t
sum_replicate_lat_lock
;
pthread_mutex_t
sum_data_transfer_lat_lock
;
pthread_mutex_t
sum_total_time_lock
;
protected:
long
long
num_requests
;
long
long
num_read_requests
;
long
long
num_write_requests
;
long
long
num_succ_requests
;
long
long
num_err_requests
;
long
long
num_dropped_requests
;
long
long
num_read_succ_requests
;
long
long
num_read_err_requests
;
long
long
num_write_succ_requests
;
long
long
num_write_err_requests
;
chrono
::
duration
<
double
,
std
::
milli
>
sum_read_latency
;
chrono
::
duration
<
double
,
std
::
milli
>
sum_write_latency
;
chrono
::
duration
<
double
,
std
::
milli
>
sum_replicate_latency
;
chrono
::
duration
<
double
,
std
::
milli
>
sum_data_transfer_latency
;
chrono
::
duration
<
double
,
std
::
milli
>
total_time_taken
;
double
request_throughput
;
double
read_throughput
;
double
write_throughput
;
public:
Monitor
();
long
long
get_num_requests
();
long
long
get_num_read_requests
();
long
long
get_num_write_requests
();
long
long
get_num_succ_requests
();
long
long
get_num_err_requests
();
long
long
get_num_dropped_requests
();
long
long
get_num_read_succ_requests
();
long
long
get_num_read_err_requests
();
long
long
get_num_write_succ_requests
();
long
long
get_num_write_err_requests
();
void
incr_num_requests
(
int
i
);
void
incr_num_read_requests
(
int
i
);
void
incr_num_write_requests
(
int
i
);
void
incr_num_succ_requests
(
int
i
);
void
incr_num_err_requests
(
int
i
);
void
incr_num_dropped_requests
(
int
i
);
void
incr_num_read_succ_requests
(
int
i
);
void
incr_num_read_err_requests
(
int
i
);
void
incr_num_write_succ_requests
(
int
i
);
void
incr_num_write_err_requests
(
int
i
);
void
incr_sum_read_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
);
void
incr_sum_write_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
);
void
incr_sum_replicate_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
);
void
incr_sum_data_transfer_latency
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
);
void
incr_sum_total_time_taken
(
chrono
::
duration
<
double
,
std
::
milli
>
lat
);
double
get_request_throughput
();
double
get_read_throughput
();
double
get_write_throughput
();
std
::
vector
<
double
>
get_all_throughput
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/queue_context.hpp
0 → 100644
View file @
d2d47b86
#ifndef __QUEUE_CONTEXT_H__
#define __QUEUE_CONTEXT_H__
#include <chrono>
#include "common.hpp"
#include "../transport_api/transport_config.hpp"
namespace
chrono
=
std
::
chrono
;
enum
job_opcode
{
TCP_READ
,
TCP_WRITE
,
RDMA_RC_READ
,
RDMA_RC_WRITE
,
RDMA_RC_TWO_SIDED_SEND
,
RDMA_RC_TWO_SIDED_RECV
,
TCP_SERVER_READ_REQUEST
,
TCP_SERVER_REPLICATION_REQUEST
,
TCP_NIC_READ_REQUEST
,
TCP_NIC_WRITE_REQUEST
,
RDMA_RC_SERVER_READ_REQUEST
,
RDMA_RC_SERVER_REPLICATION_REQUEST
,
RDMA_RC_NIC_READ_REQUEST
,
RDMA_RC_NIC_WRITE_REQUEST
//need to write for RDMA_UC
};
struct
job_context
{
TCP_Transport
*
tcp_transport
;
RDMA_Transport
*
rdma_transport
;
//enum job_opcode opcode;
uint8_t
transport_type
;
uint8_t
opcode
;
uint8_t
service_type
;
uint8_t
job_type
;
char
*
request_packet
;
struct
Common_Request
*
request
;
chrono
::
time_point
<
chrono
::
steady_clock
>
job_post_time
;
job_context
()
{
this
->
tcp_transport
=
NULL
;
this
->
rdma_transport
=
NULL
;
this
->
request
=
NULL
;
}
job_context
(
TCP_Transport
*
transport
,
uint8_t
opcode
)
{
this
->
tcp_transport
=
transport
;
this
->
rdma_transport
=
NULL
;
this
->
request
=
NULL
;
this
->
opcode
=
opcode
;
this
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
job_context
(
TCP_Transport
*
transport
,
uint8_t
opcode
,
uint8_t
type
)
{
this
->
tcp_transport
=
transport
;
this
->
rdma_transport
=
NULL
;
this
->
request
=
NULL
;
this
->
opcode
=
opcode
;
this
->
job_type
=
type
;
this
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
job_context
(
RDMA_Transport
*
transport
,
uint8_t
opcode
)
{
this
->
tcp_transport
=
NULL
;
this
->
rdma_transport
=
transport
;
this
->
request
=
NULL
;
this
->
opcode
=
opcode
;
this
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
job_context
(
RDMA_Transport
*
transport
,
uint8_t
opcode
,
uint8_t
type
)
{
this
->
tcp_transport
=
NULL
;
this
->
rdma_transport
=
transport
;
this
->
request
=
NULL
;
this
->
opcode
=
opcode
;
this
->
job_type
=
type
;
this
->
job_post_time
=
chrono
::
steady_clock
::
now
();
}
~
job_context
()
{
this
->
tcp_transport
=
NULL
;
this
->
rdma_transport
=
NULL
;
this
->
request_packet
=
NULL
;
this
->
request
=
NULL
;
}
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/thread_functions.cc
0 → 100644
View file @
d2d47b86
#ifndef __SERVER_FUNCTION_CC__
#define __SERVER_FUNCTION_CC__
#include <algorithm>
#include <chrono>
#include <set>
#include <vector>
#include <string>
#include "common.hpp"
#include "hash.hpp"
#include "thread_functions.hpp"
#include "thread_pool.hpp"
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
#include "cli_api.hpp"
#include "client_functions.hpp"
namespace
chrono
=
std
::
chrono
;
void
*
thread_function
(
void
*
)
{
job_context
*
job
=
NULL
;
char
*
packet
;
char
*
t
;
Common_Request
*
cr
;
uint8_t
transport_type
;
uint8_t
service_type
;
while
(
true
)
{
//change true loop to some variable dependent loop
while
(
job
==
NULL
)
{
job
=
job_queue
->
get_job
();
}
if
(
job
==
NULL
)
continue
;
packet
=
job
->
request_packet
;
transport_type
=
job
->
transport_type
;
service_type
=
job
->
service_type
;
cr
=
(
Common_Request
*
)
packet
;
switch
(
transport_type
)
{
case
TCP_IP_TRANSPORT
:
if
(
cr
->
opcode
==
READ
)
{
if
(
service_type
==
MASTER_SERVICE
)
{
if
(
tcp_nic_read
(
job
))
{
//err
printf
(
"Unsuccessful completion: tcp_nic_read
\n
"
);
}
else
{
printf
(
"Successful completion: tcp_nic_read
\n
"
);
}
}
else
{
if
(
tcp_server_read
(
job
))
{
printf
(
"Unsuccessful completion: tcp_server_read
\n
"
);
}
else
{
printf
(
"Successful completion: tcp_server_read
\n
"
);
}
}
}
else
{
if
(
service_type
==
MASTER_SERVICE
)
{
if
(
tcp_nic_write
(
job
))
{
printf
(
"Unsuccessful completion: tcp_nic_write
\n
"
);
}
else
{
printf
(
"Successful completion: tcp_nic_write
\n
"
);
}
}
else
{
if
(
tcp_server_replicate
(
job
))
{
printf
(
"Unsuccessful completion: tcp_server_replicate
\n
"
);
}
else
{
printf
(
"Successful completion: tcp_server_replicate
\n
"
);
}
}
}
break
;
case
RDMA_RC_TRANSPORT
:
if
(
cr
->
opcode
==
READ
)
{
if
(
service_type
==
MASTER_SERVICE
)
{
if
(
rdma_rc_nic_read
(
job
))
{
printf
(
"Unsuccessful completion: rdma_rc_nic_read
\n
"
);
}
else
{
printf
(
"Successful completion: rdma_rc_nic_read
\n
"
);
}
}
else
{
if
(
rdma_rc_server_read
(
job
))
{
printf
(
"Unsuccessful completion: rdma_rc_server_read
\n
"
);
}
else
{
printf
(
"Successful completion: rdma_rc_server_read
\n
"
);
}
}
}
else
{
if
(
service_type
==
MASTER_SERVICE
)
{
if
(
rdma_rc_nic_write
(
job
))
{
printf
(
"Unsuccessful completion: rdma_rc_nic_write
\n
"
);
}
else
{
printf
(
"Successful completion: rdma_rc_nic_write
\n
"
);
}
}
else
{
if
(
rdma_rc_server_replicate
(
job
))
{
printf
(
"Unsuccessful completion: rdma_rc_server_replicate
\n
"
);
}
else
{
printf
(
"Successful completion: rdma_rc_server_replicate
\n
"
);
}
}
}
break
;
}
delete
(
job
);
job
=
NULL
;
}
return
NULL
;
}
void
*
worker_function
(
void
*
)
{
job_context
*
job
=
NULL
;
char
*
packet
;
char
*
t
;
Common_Request
*
cr
;
uint8_t
job_transport_type
;
uint8_t
service_type
;
uint8_t
packet_type
;
while
(
true
)
{
while
(
job
==
NULL
)
{
job
=
request_queue
->
get_job
();
}
if
(
analyze
)
{
request_queue_wait_time
.
push_back
(
chrono
::
steady_clock
::
now
()
-
job
->
job_post_time
);
}
packet
=
job
->
request_packet
;
cr
=
job
->
request
;
job_transport_type
=
job
->
transport_type
;
service_type
=
job
->
service_type
;
// job type is implicit::TYPE_REQUEST since its from request queue
switch
(
job_transport_type
)
{
case
TCP_IP_TRANSPORT
:
//pass for now
break
;
case
RDMA_RC_TRANSPORT
:
switch
(
cr
->
opcode
)
{
case
READ
:
case
MASTER_SERVICE
:
switch
(
self_id
)
{
case
NIC
:
if
(
rdma_rc_nic_read
(
job
))
{
if
(
debug
)
{
printf
(
"Unsuccessful completion: rdma_rc_nic_read
\n
"
);
}
}
else
{
if
(
debug
)
{
printf
(
"Successful completion: rdma_rc_nic_read
\n
"
);
}
}
break
;
case
SERVER
:
if
(
rdma_rc_server_read
(
job
))
{
if
(
debug
)
{
printf
(
"Unsuccessful completion: rdma_rc_server_read
\n
"
);
}
}
else
{
if
(
debug
)
{
printf
(
"Successful completion: rdma_rc_server_read
\n
"
);
}
}
break
;
}
break
;
case
WRITE
:
switch
(
cr
->
service_type
)
{
case
MASTER_SERVICE
:
switch
(
self_id
)
{
case
NIC
:
if
(
rdma_rc_nic_write
(
job
))
{
if
(
debug
)
printf
(
"Unsuccessful completion: rdma_rc_nic_write
\n
"
);
}
else
{
if
(
debug
)
printf
(
"Successful completion: rdma_rc_nic_write
\n
"
);
}
req_cnt
++
;
break
;
case
SERVER
:
if
(
rdma_rc_server_write
(
job
))
{
if
(
debug
)
printf
(
"Unsuccessful completion: rdma_rc_server_write
\n
"
);
}
else
{
if
(
debug
)
printf
(
"Successful completion: rdma_rc_server_write
\n
"
);
}
req_cnt
++
;
break
;
}
break
;
case
BACKUP_SERVICE
:
if
(
rdma_rc_server_replicate
(
job
))
{
if
(
debug
)
printf
(
"Unsuccessful completion: rdma_rc_service_replicate
\n
"
);
}
else
{
if
(
debug
)
printf
(
"Successful completion: rdma_rc_service_replicate
\n
"
);
}
req_cnt
++
;
break
;
}
break
;
}
break
;
}
delete
(
job
);
job
=
NULL
;
}
return
NULL
;
}
void
*
client_worker_function
(
void
*
transport_ptr
)
{
// job will not contain transport
RDMA_Transport
*
transport
=
(
RDMA_Transport
*
)
transport_ptr
;
job_context
*
job
=
NULL
;
char
*
packet
;
char
*
t
;
Common_Request
*
cr
;
uint8_t
job_transport_type
;
uint8_t
opcode
;
char
*
blob
;
size_t
key_size
;
std
::
string
key
;
std
::
string
value
;
while
(
true
)
{
while
(
job
==
NULL
)
{
job
=
request_queue
->
get_job
();
}
packet
=
job
->
request_packet
;
cr
=
job
->
request
;
job_transport_type
=
job
->
transport_type
;
opcode
=
cr
->
opcode
;
switch
(
job_transport_type
)
{
case
TCP_IP_TRANSPORT
:
// pass for now
break
;
case
RDMA_RC_TRANSPORT
:
switch
(
opcode
)
{
case
READ
:
blob
=
job
->
request_packet
+
sizeof
(
Common_Request
);
key
=
std
::
string
(
get_key_ptr
(
blob
));
key_size
=
get_key_size_from_blob
(
blob
);
read_rpc
(
transport
,
key
.
c_str
(),
key_size
);
break
;
case
WRITE
:
blob
=
packet
+
sizeof
(
Common_Request
);
key
=
std
::
string
(
get_key_ptr
(
blob
));
key_size
=
get_key_size_from_blob
(
blob
);
value
=
std
::
string
(
get_val_ptr
(
blob
));
size_t
value_size
=
get_val_size_from_start_ptr
(
get_val_ptr
(
blob
));
write_rpc
(
transport
,
key
.
c_str
(),
key_size
,
value
.
c_str
(),
value_size
);
break
;
}
break
;
}
switch
(
job_transport_type
)
{
case
TCP_IP_TRANSPORT
:
//ignore
break
;
case
RDMA_RC_TRANSPORT
:
if
(
job
->
rdma_transport
->
get_mr_addr
()
!=
job
->
request_packet
)
{
free
(
job
->
request_packet
);
}
}
delete
(
job
);
job
=
NULL
;
}
return
NULL
;
}
void
append_to_log
(
char
*
buf
,
size_t
buf_size
)
{
//ignore
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
append_log_time
.
push_back
(
end_time
-
start_time
);
}
return
;
}
int
log_append
(
char
*
key
,
size_t
key_size
,
char
*
value
,
size_t
value_size
)
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
if
(
storage_log
.
add_to_log
(
key
,
key_size
,
value
,
value_size
))
{
printf
(
"Log full
\n
"
);
return
-
1
;
}
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
append_log_time
.
push_back
(
end_time
-
start_time
);
}
return
0
;
}
int
tcp_server_read
(
job_context
*
job
)
{
//ignore the request
return
0
;
}
int
tcp_server_write
(
job_context
*
job
)
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
auto
poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
poll_end_time
=
chrono
::
steady_clock
::
now
();
job_context
*
job_to_send
;
int
response_buffer_pos
;
char
*
packet
=
job
->
request_packet
;
Common_Request
*
w
=
job
->
request
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
size_t
final_size
=
sizeof
(
Common_Request
)
+
blob_size
;
char
*
blob
=
packet
+
sizeof
(
Common_Request
);
char
*
request_to_send
;
Common_Request
*
ack
;
w
->
service_type
=
BACKUP_SERVICE
;
bool
flag
=
true
;
std
::
vector
<
TCP_Transport
*>
replica_conns
;
replica_conns
=
get_tcp_replicas
(
w
->
req
.
w_request
.
rpcId
);
for
(
TCP_Transport
*
transport
:
replica_conns
)
{
request_to_send
=
(
char
*
)
malloc
(
final_size
);
memcpy
(
request_to_send
,
w
,
final_size
);
((
Common_Request
*
)
request_to_send
)
->
service_type
=
BACKUP_SERVICE
;
job_to_send
=
new
job_context
(
transport
,
WRITE
);
job_to_send
->
job_type
=
TYPE_REQUEST
;
job_to_send
->
service_type
=
BACKUP_SERVICE
;
job_to_send
->
request
=
(
Common_Request
*
)
request_to_send
;
job_to_send
->
request_packet
=
request_to_send
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
request_to_send
=
NULL
;
job_to_send
=
NULL
;
}
// append to log
char
*
key_ptr
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_start_ptr
(
key_ptr
);
char
*
value_ptr
=
get_val_ptr
(
blob
);
size_t
vallue_size
=
get_val_size_from_start_ptr
(
value_ptr
);
if
(
storage_log
.
add_to_log
(
key_ptr
,
key_size
,
value_ptr
,
vallue_size
))
{
printf
(
"Log full
\n
"
);
}
// get acks
memset
(
ack
,
0
,
sizeof
(
Common_Request
));
for
(
TCP_Transport
*
transport
:
replica_conns
)
{
response_buffer_pos
=
-
1
;
poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
transport
->
get_conn_fd
());
poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer
<
0
&&
(
poll_end_time
-
poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
response_buffer_pos
==
-
1
)
{
printf
(
"Response Buffer timeout
\n
"
);
//send Nack
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
{
printf
(
"Ack received
\n
"
);
}
}
else
{
if
(
debug
)
{
printf
(
"Nack received
\n
"
);
}
flag
=
false
;
}
response_buffer
->
erase_element
(
transport
->
get_conn_fd
());
}
ack
=
new
Common_Request
();
ack
->
type
=
TYPE_RESPONSE
;
job_to_send
=
new
job_context
(
job
->
tcp_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
ack
;
job_to_send
->
request_packet
=
(
char
*
)
ack
;
if
(
!
flag
)
{
printf
(
"All Acks not received
\n
"
);
// send Nack
ack
->
opcode
=
STATUS_INTERNAL_ERROR
;
ack
->
req
.
w_response
.
common
.
status
=
STATUS_INTERNAL_ERROR
;
job_to_send
->
opcode
=
STATUS_INTERNAL_ERROR
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
return
-
1
;
}
else
{
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
return
0
;
}
return
0
;
}
int
tcp_nic_read
(
job_context
*
job
)
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
Common_Request
*
r
=
job
->
request
;
char
*
packet
=
job
->
request_packet
;
char
*
key
=
packet
+
sizeof
(
Common_Request
);
size_t
key_size
=
get_key_size_from_start_ptr
(
key
);
struct
Seglet
*
value_ptr
=
storage_log
.
check_log_for_obj
(
key
);
Common_Request
*
request_to_send
;
job_context
*
job_to_send
;
if
(
value_ptr
==
NULL
)
{
// invalid request
// send Nack
request_to_send
=
new
Common_Request
();
request_to_send
->
opcode
=
READ
;
request_to_send
->
type
=
TYPE_RESPONSE
;
request_to_send
->
req
.
r_response
.
common
.
status
=
STATUS_INVALID_OBJECT
;
request_to_send
->
req
.
r_response
.
length
=
0
;
job_to_send
=
new
job_context
(
job
->
tcp_transport
,
STATUS_INVALID_OBJECT
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
request_to_send
;
job_to_send
->
request_packet
=
(
char
*
)
request_to_send
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
return
0
;
}
// valid case
int
response_buffer_pos
;
Common_Request
*
ack
;
size_t
value_size
=
get_val_size_from_start_ptr
(
value_ptr
->
value
);
size_t
final_size
=
sizeof
(
Common_Request
)
+
value_size
+
1
;
char
*
packet_to_send
=
(
char
*
)
malloc
(
final_size
);
memset
(
packet_to_send
,
0
,
final_size
);
memcpy
(
packet_to_send
+
sizeof
(
Common_Request
),
value_ptr
->
value
,
value_size
);
request_to_send
=
(
Common_Request
*
)
packet_to_send
;
request_to_send
->
opcode
=
READ
;
request_to_send
->
type
=
TYPE_RESPONSE
;
request_to_send
->
req
.
r_response
.
common
.
status
=
STATUS_OK
;
request_to_send
->
req
.
r_response
.
length
=
value_size
+
1
;
job_to_send
=
new
job_context
(
job
->
tcp_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
request_to_send
;
job_to_send
->
request_packet
=
packet_to_send
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
// wait for ack
response_buffer_pos
=
-
1
;
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
job
->
tcp_transport
->
get_conn_fd
());
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_pos
<
0
&&
(
response_poll_end_time
-
response_poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
response_buffer_pos
==
-
1
)
{
printf
(
"Response not received
\n
"
);
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
printf
(
"Ack received
\n
"
);
}
else
{
if
(
debug
)
printf
(
"Nack received
\n
"
);
}
response_buffer
->
erase_element
(
job
->
tcp_transport
->
get_conn_fd
());
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
worker_read_service_time
.
push_back
((
end_time
-
start_time
));
}
return
0
;
}
int
rdma_rc_server_read
(
job_context
*
job
)
{
// THE CR STRUCTURE FOR READ IS
/*
-------------------------------------------
| HEADER | KEY | NULL |
|____________________|_________|__________|
*/
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
Common_Request
*
w
=
job
->
request
;
char
*
packet
=
job
->
request_packet
;
char
*
blob
=
job
->
request_packet
+
sizeof
(
Common_Request
);
char
*
key
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_start_ptr
(
key
);
struct
Seglet
*
value_ptr
=
storage_log
.
check_log_for_obj
(
key
);
if
(
value_ptr
==
NULL
)
{
// invalid request
return
-
1
;
}
// valid case
int
response_buffer_pos
;
job_context
*
job_to_send
;
Common_Request
*
ack
;
size_t
value_size
=
get_val_size_from_start_ptr
((
char
*
)
value_ptr
);
// copy value to MR
memset
(
job
->
rdma_transport
->
get_mr_addr
(),
0
,
job
->
rdma_transport
->
get_mr_size
());
memcpy
(
job
->
rdma_transport
->
get_mr_addr
(),
(
char
*
)
value_ptr
,
value_size
);
// send prep req to client to read MR
Common_Request
*
prep_req
=
new
Common_Request
;
prep_req
->
opcode
=
READ
;
prep_req
->
type
=
TYPE_RESPONSE
;
prep_req
->
req
.
r_response
.
common
.
status
=
STATUS_OK
;
prep_req
->
req
.
r_response
.
length
=
0
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
prep_req
;
job_to_send
->
request_packet
=
(
char
*
)
prep_req
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
// wait for ack
response_buffer_pos
=
-
1
;
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
job
->
rdma_transport
->
get_conn_fd
());
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_pos
<
0
&&
(
response_poll_end_time
-
response_poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
response_buffer_pos
==-
1
)
{
printf
(
"Response buffer poll timeout
\n
"
);
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
printf
(
"Ack received
\n
"
);
}
else
{
if
(
debug
)
printf
(
"NAck Received
\n
"
);
}
response_buffer
->
erase_element
(
job
->
rdma_transport
->
get_conn_fd
());
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
worker_read_service_time
.
push_back
((
end_time
-
start_time
));
}
return
0
;
}
int
rdma_rc_server_write
(
job_context
*
job
)
{
auto
ack_start_time
=
chrono
::
steady_clock
::
now
();
auto
ack_end_time
=
chrono
::
steady_clock
::
now
();
auto
poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
poll_end_time
=
chrono
::
steady_clock
::
now
();
job_context
*
job_to_send
;
int
response_buffer_pos
;
char
*
packet
=
job
->
request_packet
;
Common_Request
*
w
=
(
Common_Request
*
)
packet
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
char
*
blob
=
packet
+
sizeof
(
Common_Request
);
Common_Request
*
prep_request
=
new
Common_Request
();
prep_request
->
opcode
=
WRITE
;
prep_request
->
req
.
w_request
.
length
=
0
;
prep_request
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
prep_request
->
service_type
=
BACKUP_SERVICE
;
prep_request
->
type
=
TYPE_REQUEST
;
Common_Request
*
ack
;
bool
flag
=
true
;
//change required attrs in packet common request
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
service_type
=
BACKUP_SERVICE
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
req
.
w_request
.
common
.
service
=
BACKUP_SERVICE
;
//get replica connection objects
TCP_Transport
*
tcp_conn
;
std
::
vector
<
RDMA_Transport
*>
replica_conns
;
replica_conns
=
get_rdma_replicas
(
w
->
req
.
w_request
.
rpcId
);
// copy packet to mrs and send prep requests
// packet already exists mr
for
(
RDMA_Transport
*
transport
:
replica_conns
)
{
memcpy
(
transport
->
get_mr_addr
(),
job
->
rdma_transport
->
get_mr_addr
(),
blob_size
+
sizeof
(
Common_Request
));
job_to_send
=
new
job_context
(
transport
,
job
->
opcode
);
job_to_send
->
job_type
=
TYPE_REQUEST
;
job_to_send
->
request
=
prep_request
;
job_to_send
->
request_packet
=
(
char
*
)
prep_request
;
job_to_send
->
service_type
=
prep_request
->
service_type
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
}
// append data to own log
char
*
key_ptr
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_blob
(
blob
);
char
*
value_ptr
=
get_val_ptr
(
blob
);
size_t
value_size
=
get_val_size_from_start_ptr
(
value_ptr
);
if
(
log_append
(
key_ptr
,
key_size
,
value_ptr
,
value_size
))
{
printf
(
"Log full
\n
"
);
}
//get acks
if
(
analyze
)
{
ack_start_time
=
chrono
::
steady_clock
::
now
();
}
for
(
RDMA_Transport
*
transport
:
replica_conns
)
{
//need to poll response buffer
response_buffer_pos
=
-
1
;
poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
transport
->
get_conn_fd
());
poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_pos
<
0
&&
(
poll_end_time
-
poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
analyze
)
{
response_buffer_wait_time
.
push_back
(
chrono
::
steady_clock
::
now
()
-
job
->
job_post_time
);
}
if
(
response_buffer_pos
==-
1
)
{
printf
(
"Response buffer poll timeout
\n
"
);
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
printf
(
"Ack received
\n
"
);
}
else
{
if
(
debug
)
printf
(
"NAck Received
\n
"
);
flag
&=
false
;
}
response_buffer
->
erase_element
(
transport
->
get_conn_fd
());
}
if
(
analyze
)
{
ack_end_time
=
chrono
::
steady_clock
::
now
();
master_backup_ack_time
.
push_back
(
ack_end_time
-
ack_start_time
);
}
if
(
!
flag
)
{
//send err
printf
(
"Error in receiving ack
\n
"
);
//delete(w);
return
-
1
;
}
else
{
ack
=
new
Common_Request
();
ack
->
opcode
=
STATUS_OK
;
ack
->
req
.
w_request
.
length
=
0
;
ack
->
req
.
w_response
.
common
.
status
=
STATUS_OK
;
ack
->
type
=
TYPE_RESPONSE
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
ack
;
job_to_send
->
request_packet
=
(
char
*
)
ack
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
}
return
0
;
}
int
rdma_rc_nic_read
(
job_context
*
job
)
{
// THE CR STRUCTURE FOR READ IS
/*
-------------------------------------------
| HEADER | KEY | NULL |
|____________________|_________|__________|
*/
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
Common_Request
*
w
=
job
->
request
;
char
*
packet
=
job
->
request_packet
;
char
*
blob
=
job
->
request_packet
+
sizeof
(
Common_Request
);
char
*
key
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_start_ptr
(
key
);
struct
Seglet
*
value_ptr
=
storage_log
.
check_log_for_obj
(
key
);
Common_Request
*
prep_req
;
job_context
*
job_to_send
;
if
(
value_ptr
==
NULL
)
{
// invalid request
// here instead of prep request, send error req
prep_req
=
new
Common_Request
();
prep_req
->
opcode
=
READ
;
prep_req
->
type
=
TYPE_RESPONSE
;
prep_req
->
req
.
r_response
.
common
.
status
=
STATUS_INVALID_OBJECT
;
prep_req
->
req
.
r_response
.
length
=
0
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
prep_req
;
job_to_send
->
request_packet
=
(
char
*
)
prep_req
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
return
-
1
;
}
// valid case
int
response_buffer_pos
;
Common_Request
*
ack
;
size_t
value_size
=
get_val_size_from_start_ptr
(
value_ptr
->
value
);
// copy value to MR
memset
(
job
->
rdma_transport
->
get_mr_addr
(),
0
,
job
->
rdma_transport
->
get_mr_size
());
memcpy
(
job
->
rdma_transport
->
get_mr_addr
(),
value_ptr
->
value
,
value_size
);
// send prep req to client to read MR
prep_req
=
new
Common_Request
;
prep_req
->
opcode
=
READ
;
prep_req
->
type
=
TYPE_RESPONSE
;
prep_req
->
req
.
r_response
.
common
.
status
=
STATUS_OK
;
prep_req
->
req
.
r_response
.
length
=
0
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
prep_req
;
job_to_send
->
request_packet
=
(
char
*
)
prep_req
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
// wait for ack
response_buffer_pos
=
-
1
;
response_poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
job
->
rdma_transport
->
get_conn_fd
());
response_poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_pos
<
0
&&
(
response_poll_end_time
-
response_poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
response_buffer_pos
==-
1
)
{
printf
(
"Response buffer poll timeout
\n
"
);
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
printf
(
"Ack received
\n
"
);
}
else
{
if
(
debug
)
printf
(
"NAck Received
\n
"
);
}
response_buffer
->
erase_element
(
job
->
rdma_transport
->
get_conn_fd
());
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
worker_read_service_time
.
push_back
((
end_time
-
start_time
));
}
return
0
;
}
std
::
vector
<
TCP_Transport
*>
get_tcp_replicas
(
uint64_t
key
)
{
std
::
set
<
TCP_Transport
*>
in_transports
;
uint32_t
hash_val
=
hash
(
key
,
num_servers
);
in_transports
.
insert
(
common
::
tcp_transport_map
[
hash_val
]);
size_t
p_size
;
size_t
a_size
;
for
(
int
i
=
0
,
c
=
0
;
i
<
num_servers
&&
c
<
num_replicas
;
i
++
)
{
p_size
=
in_transports
.
size
();
in_transports
.
insert
(
common
::
tcp_transport_map
[
i
]);
a_size
=
in_transports
.
size
();
if
(
p_size
!=
a_size
)
c
++
;
}
std
::
vector
<
TCP_Transport
*>
ret_transports
(
in_transports
.
begin
(),
in_transports
.
end
());
return
ret_transports
;
}
std
::
vector
<
RDMA_Transport
*>
get_rdma_replicas
(
uint64_t
key
)
{
std
::
set
<
RDMA_Transport
*>
in_transports
;
uint32_t
hash_val
=
hash
(
key
,
num_servers
);
in_transports
.
insert
(
common
::
rdma_transport_map
[
hash_val
]);
size_t
p_size
;
size_t
a_size
;
for
(
int
i
=
0
,
c
=
0
;
i
<
num_servers
&&
c
<
num_replicas
;
i
++
)
{
p_size
=
in_transports
.
size
();
in_transports
.
insert
(
common
::
rdma_transport_map
[
i
]);
a_size
=
in_transports
.
size
();
if
(
p_size
!=
a_size
)
c
++
;
}
std
::
vector
<
RDMA_Transport
*>
ret_transports
(
in_transports
.
begin
(),
in_transports
.
end
());
return
ret_transports
;
}
int
tcp_server_replicate
(
job_context
*
job
)
{
char
*
packet
=
job
->
request_packet
;
Common_Request
*
w
=
(
Common_Request
*
)
packet
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
char
*
blob
=
packet
+
sizeof
(
struct
Common_Request
);
append_to_log
(
blob
,
blob_size
);
//now to send ack back
TCP_Transport
*
transport
=
job
->
tcp_transport
;
w
=
new
Common_Request
();
w
->
opcode
=
STATUS_OK
;
w
->
req
.
w_response
.
common
.
status
=
STATUS_OK
;
transport
->
mr
=
(
char
*
)
w
;
transport
->
mr_size
=
sizeof
(
struct
Common_Request
);
if
(
transport
->
send_data
())
{
//error
delete
(
w
);
return
-
1
;
}
delete
(
w
);
return
0
;
}
int
rdma_rc_server_replicate
(
job_context
*
job
)
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
RDMA_Transport
*
rdma_transport
=
job
->
rdma_transport
;
if
(
rdma_transport
->
one_sided_read
())
{
perror
(
"Unable to read replicate request"
);
return
-
1
;
}
char
*
packet
=
rdma_transport
->
get_mr_addr
();
//char *packet = job->request_packet;
job_context
*
job_to_send
;
Common_Request
*
w
=
(
Common_Request
*
)
packet
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
char
*
blob
=
packet
+
sizeof
(
struct
Common_Request
);
// Append to log
char
*
key_ptr
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_blob
(
blob
);
char
*
value_ptr
=
get_val_ptr
(
blob
);
size_t
value_size
=
get_val_size_from_start_ptr
(
value_ptr
);
if
(
log_append
(
key_ptr
,
key_size
,
value_ptr
,
value_size
))
{
printf
(
"Log full
\n
"
);
}
//now to send ack back
TCP_Transport
*
tcp_transport
=
job
->
rdma_transport
->
get_tcp_conn
();
w
=
new
Common_Request
();
w
->
opcode
=
STATUS_OK
;
w
->
req
.
w_request
.
length
=
0
;
w
->
req
.
w_response
.
common
.
status
=
STATUS_OK
;
w
->
type
=
TYPE_RESPONSE
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
opcode
=
STATUS_OK
;
job_to_send
->
request
=
w
;
job_to_send
->
request_packet
=
(
char
*
)
w
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
worker_write_service_time
.
push_back
(
end_time
-
start_time
);
}
return
0
;
}
int
tcp_nic_write
(
job_context
*
job
)
{
// in case of tcp, no need for prep request
// char *prep_request;
// size_t prep_request_size;
char
*
packet
=
job
->
request_packet
;
Common_Request
*
w
=
(
Common_Request
*
)
packet
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
char
*
blob
=
packet
+
sizeof
(
Common_Request
);
Common_Request
ack
;
bool
flag
=
true
;
//get replica connection objects
std
::
vector
<
TCP_Transport
*>
replica_conns
;
replica_conns
=
get_tcp_replicas
(
w
->
req
.
w_request
.
rpcId
);
w
->
service_type
=
BACKUP_SERVICE
;
for
(
TCP_Transport
*
transport
:
replica_conns
)
{
transport
->
mr
=
packet
;
transport
->
mr_size
=
sizeof
(
Common_Request
)
+
blob_size
;
transport
->
send_data
();
}
//get acknowledgements
for
(
TCP_Transport
*
transport
:
replica_conns
)
{
transport
->
recv_data
((
char
*
)
&
ack
,
sizeof
(
ack
));
if
(
ack
.
opcode
==
STATUS_OK
)
{
//do something
}
else
{
flag
=
flag
&
false
;
}
}
memset
(
&
ack
,
0
,
sizeof
(
Common_Request
));
if
(
flag
)
{
ack
.
opcode
=
STATUS_OK
;
ack
.
req
.
w_response
.
common
.
status
=
STATUS_OK
;
}
else
{
ack
.
opcode
=
STATUS_INTERNAL_ERROR
;
ack
.
req
.
w_response
.
common
.
status
=
STATUS_INTERNAL_ERROR
;
}
TCP_Transport
*
client_conn
=
job
->
tcp_transport
;
client_conn
->
mr
=
(
char
*
)
&
ack
;
client_conn
->
mr_size
=
sizeof
(
ack
);
client_conn
->
send_data
();
if
(
!
flag
)
return
-
1
;
return
0
;
}
int
rdma_rc_nic_write
(
job_context
*
job
)
{
auto
ack_start_time
=
chrono
::
steady_clock
::
now
();
auto
ack_end_time
=
chrono
::
steady_clock
::
now
();
auto
poll_start_time
=
chrono
::
steady_clock
::
now
();
auto
poll_end_time
=
chrono
::
steady_clock
::
now
();
job_context
*
job_to_send
;
int
response_buffer_pos
;
char
*
packet
=
job
->
request_packet
;
Common_Request
*
w
=
(
Common_Request
*
)
packet
;
size_t
blob_size
=
w
->
req
.
w_request
.
length
;
char
*
blob
=
packet
+
sizeof
(
Common_Request
);
Common_Request
*
prep_request
=
new
Common_Request
();
prep_request
->
opcode
=
WRITE
;
prep_request
->
req
.
w_request
.
length
=
0
;
prep_request
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
prep_request
->
service_type
=
BACKUP_SERVICE
;
prep_request
->
type
=
TYPE_REQUEST
;
Common_Request
*
ack
;
bool
flag
=
true
;
//change required attrs in packet common request
w
->
opcode
=
WRITE
;
w
->
type
=
TYPE_REQUEST
;
w
->
service_type
=
BACKUP_SERVICE
;
w
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
w
->
req
.
w_request
.
common
.
service
=
BACKUP_SERVICE
;
//memcpy(packet, (char*)&prep_request, sizeof(prep_request));
//job->rdma_transport->copy_to_mr(packet, sizeof(Common_Request)+blob_size);
//get replica connection objects
TCP_Transport
*
tcp_conn
;
std
::
vector
<
RDMA_Transport
*>
replica_conns
;
replica_conns
=
get_rdma_replicas
(
w
->
req
.
w_request
.
rpcId
);
// copy packet to mrs and send prep requests
// packet already exists mr
for
(
RDMA_Transport
*
transport
:
replica_conns
)
{
memcpy
(
transport
->
get_mr_addr
(),
job
->
rdma_transport
->
get_mr_addr
(),
blob_size
+
sizeof
(
Common_Request
));
job_to_send
=
new
job_context
(
transport
,
job
->
opcode
);
job_to_send
->
job_type
=
TYPE_REQUEST
;
job_to_send
->
request
=
prep_request
;
job_to_send
->
request_packet
=
(
char
*
)
prep_request
;
job_to_send
->
service_type
=
prep_request
->
service_type
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
}
// Append to log
char
*
key_ptr
=
get_key_ptr
(
blob
);
size_t
key_size
=
get_key_size_from_blob
(
blob
);
char
*
value_ptr
=
get_val_ptr
(
blob
);
size_t
value_size
=
get_val_size_from_start_ptr
(
value_ptr
);
if
(
log_append
(
key_ptr
,
key_size
,
value_ptr
,
value_size
))
{
printf
(
"Log full
\n
"
);
}
//get acks
if
(
analyze
)
{
ack_start_time
=
chrono
::
steady_clock
::
now
();
}
for
(
RDMA_Transport
*
transport
:
replica_conns
)
{
// tcp_conn = transport->get_tcp_conn();
// tcp_conn->recv_data((char*)&ack, sizeof(ack));
// if(ack.opcode == STATUS_OK) {
// //do something
// printf("Ack Received\n");
// }
// else {
// printf("NAck Received\n");
// flag = flag & false;
// }
//need to poll response buffer
response_buffer_pos
=
-
1
;
poll_start_time
=
chrono
::
steady_clock
::
now
();
do
{
response_buffer_pos
=
response_buffer
->
poll
(
transport
->
get_conn_fd
());
poll_end_time
=
chrono
::
steady_clock
::
now
();
}
while
(
response_buffer_pos
<
0
&&
(
poll_end_time
-
poll_start_time
)
<
worker_response_buffer_poll_timeout
);
if
(
analyze
)
{
response_buffer_wait_time
.
push_back
(
chrono
::
steady_clock
::
now
()
-
job
->
job_post_time
);
}
if
(
response_buffer_pos
==-
1
)
{
printf
(
"Response buffer poll timeout
\n
"
);
exit
(
-
1
);
}
ack
=
response_buffer
->
buffer
[
response_buffer_pos
].
cr
;
if
(
ack
->
opcode
==
STATUS_OK
)
{
if
(
debug
)
printf
(
"Ack received
\n
"
);
}
else
{
if
(
debug
)
printf
(
"NAck Received
\n
"
);
flag
&=
false
;
}
response_buffer
->
erase_element
(
transport
->
get_conn_fd
());
}
if
(
analyze
)
{
ack_end_time
=
chrono
::
steady_clock
::
now
();
master_backup_ack_time
.
push_back
(
ack_end_time
-
ack_start_time
);
}
if
(
!
flag
)
{
//send err
printf
(
"Error in receiving ack
\n
"
);
//delete(w);
return
-
1
;
}
else
{
ack
=
new
Common_Request
();
ack
->
opcode
=
STATUS_OK
;
ack
->
req
.
w_request
.
length
=
0
;
ack
->
req
.
w_response
.
common
.
status
=
STATUS_OK
;
ack
->
type
=
TYPE_RESPONSE
;
ack
->
request_start_time
=
job
->
request
->
request_start_time
;
job_to_send
=
new
job_context
(
job
->
rdma_transport
,
STATUS_OK
);
job_to_send
->
job_type
=
TYPE_RESPONSE
;
job_to_send
->
request
=
ack
;
job_to_send
->
request_packet
=
(
char
*
)
ack
;
job_to_send
->
job_post_time
=
chrono
::
steady_clock
::
now
();
send_queue
->
enqueue
(
job_to_send
);
job_to_send
=
NULL
;
// tcp_conn = job->rdma_transport->get_tcp_conn();
// tcp_conn->mr = (char*)&ack;
// tcp_conn->mr_size = sizeof(ack);
// tcp_conn->send_data();
}
//delete(w);
return
0
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/thread_functions.hpp
0 → 100644
View file @
d2d47b86
#ifndef __THREAD_FUNCTIONS_H__
#define __THREAD_FUNCTIONS_H__
#include <vector>
#include "threadsafe_queue.hpp"
#include "../transport_api/transport_config.hpp"
//void thread_function(void);
void
*
thread_function
(
void
*
);
void
*
worker_function
(
void
*
);
void
*
client_worker_function
(
void
*
transport_ptr
);
int
tcp_server_read
(
job_context
*
job
);
int
tcp_server_write
(
job_context
*
job
);
int
tcp_server_replicate
(
job_context
*
job
);
int
tcp_nic_read
(
job_context
*
job
);
int
tcp_nic_write
(
job_context
*
job
);
int
rdma_rc_server_read
(
job_context
*
job
);
int
rdma_rc_server_write
(
job_context
*
job
);
int
rdma_rc_server_replicate
(
job_context
*
job
);
int
rdma_rc_nic_read
(
job_context
*
job
);
int
rdma_rc_nic_write
(
job_context
*
job
);
void
append_to_log
(
char
*
buf
,
size_t
buf_size
);
int
log_append
(
char
*
key
,
size_t
key_size
,
char
*
value
,
size_t
value_size
);
std
::
vector
<
TCP_Transport
*>
get_tcp_replicas
(
uint64_t
key
);
std
::
vector
<
RDMA_Transport
*>
get_rdma_replicas
(
uint64_t
key
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/thread_pool.cc
0 → 100644
View file @
d2d47b86
#ifndef __THREAD_POOL_CC__
#define __THREAD_POOL_CC__
#include <vector>
#include <string>
#include <thread>
#include "thread_pool.hpp"
Thread_Pool
::
Thread_Pool
(
size_t
n
)
{
this
->
num_threads
=
n
;
this
->
__function
=
NULL
;
this
->
job_queue
=
new
Thread_Safe_Queue
();
}
Thread_Pool
::
Thread_Pool
(
Thread_Safe_Queue
*
q
)
{
this
->
num_threads
=
0
;
this
->
__function
=
NULL
;
this
->
job_queue
=
q
;
}
Thread_Pool
::
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
)
{
this
->
num_threads
=
n
;
this
->
__function
=
NULL
;
this
->
job_queue
=
q
;
}
Thread_Pool
::
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
,
void
*
(
*
f
)(
void
*
))
{
this
->
num_threads
=
n
;
this
->
__function
=
f
;
this
->
job_queue
=
q
;
}
Thread_Pool
::
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
,
void
*
(
*
f
)(
void
*
),
void
*
args
)
{
this
->
num_threads
=
n
;
this
->
__function
=
f
;
this
->
args
=
args
;
this
->
job_queue
=
q
;
}
Thread_Pool
::~
Thread_Pool
()
{
for
(
int
i
=
0
;
i
<
this
->
num_threads
;
i
++
)
{
this
->
thread_arr
[
i
].
join
();
}
}
void
Thread_Pool
::
set_num_threads
(
size_t
n
)
{
this
->
num_threads
=
n
;
}
void
Thread_Pool
::
set_function
(
void
*
(
*
f
)(
void
*
))
{
this
->
__function
=
f
;
}
void
Thread_Pool
::
set_function
(
void
(
*
f
)
(
void
))
{
this
->
_funtion
=
f
;
}
void
Thread_Pool
::
set_args
(
void
*
args
)
{
this
->
args
=
args
;
}
void
Thread_Pool
::
start_threads
()
{
for
(
int
i
=
0
;
i
<
this
->
num_threads
;
i
++
)
{
this
->
thread_arr
.
push_back
(
std
::
thread
(
this
->
__function
,
this
->
args
));
}
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/thread_pool.hpp
0 → 100644
View file @
d2d47b86
#ifndef __THREAD_POOL_H__
#define __THREAD_POOL_H__
#include <vector>
#include <string>
#include <infiniband/verbs.h>
#include <pthread.h>
#include <thread>
#include "threadsafe_queue.hpp"
class
Thread_Pool
{
protected:
size_t
num_threads
;
void
*
(
*
__function
)
(
void
*
);
void
(
*
_funtion
)
(
void
);
void
*
args
;
std
::
vector
<
std
::
thread
>
thread_arr
;
Thread_Safe_Queue
*
job_queue
;
public:
Thread_Pool
(
size_t
n
);
Thread_Pool
(
Thread_Safe_Queue
*
q
);
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
);
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
,
void
*
(
*
f
)(
void
*
));
Thread_Pool
(
Thread_Safe_Queue
*
q
,
size_t
n
,
void
*
(
*
f
)(
void
*
),
void
*
args
);
~
Thread_Pool
();
void
set_num_threads
(
size_t
n
);
void
set_function
(
void
*
(
*
f
)(
void
*
));
void
set_function
(
void
(
*
f
)
(
void
));
void
set_args
(
void
*
args
);
void
start_threads
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/threadsafe_queue.cc
0 → 100644
View file @
d2d47b86
#ifndef __THREADSAFE_QUEUE_C__
#define __THREADSAFE_QUEUE_C__
#include <queue>
#include <pthread.h>
#include "queue_context.hpp"
#include "threadsafe_queue.hpp"
Thread_Safe_Queue
::
Thread_Safe_Queue
()
{
pthread_mutex_init
(
&
(
this
->
queue_lock
),
NULL
);
this
->
queue_size
=
0
;
}
Thread_Safe_Queue
::
Thread_Safe_Queue
(
size_t
n
,
job_context
*
jobs
[])
:
Thread_Safe_Queue
()
{
this
->
queue_size
=
n
;
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
this
->
q
.
push
(
jobs
[
i
]);
}
}
Thread_Safe_Queue
::~
Thread_Safe_Queue
()
{
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
pthread_mutex_destroy
(
&
(
this
->
queue_lock
));
job_context
*
t
;
while
(
!
this
->
q
.
empty
())
{
t
=
this
->
q
.
front
();
this
->
q
.
pop
();
delete
(
t
);
}
}
void
Thread_Safe_Queue
::
enqueue
(
job_context
*
job
)
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
push
(
job
);
this
->
queue_size
=
this
->
q
.
size
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
void
Thread_Safe_Queue
::
enqueue
(
TCP_Transport
*
transport
,
uint8_t
opcode
)
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
push
(
new
job_context
(
transport
,
opcode
));
this
->
queue_size
=
this
->
q
.
size
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
void
Thread_Safe_Queue
::
enqueue
(
RDMA_Transport
*
transport
,
uint8_t
opcode
)
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
push
(
new
job_context
(
transport
,
opcode
));
this
->
queue_size
=
this
->
q
.
size
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
void
Thread_Safe_Queue
::
dequeue
()
{
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
this
->
q
.
pop
();
this
->
queue_size
=
this
->
q
.
size
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
}
job_context
*
Thread_Safe_Queue
::
front
()
{
return
this
->
q
.
front
();
}
job_context
*
Thread_Safe_Queue
::
get_job
()
{
job_context
*
t
=
NULL
;
pthread_mutex_lock
(
&
(
this
->
queue_lock
));
if
(
this
->
q
.
size
()
==
0
)
{
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
NULL
;
}
t
=
this
->
q
.
front
();
if
(
t
==
NULL
)
{
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
NULL
;
}
this
->
q
.
pop
();
this
->
queue_size
=
this
->
q
.
size
();
pthread_mutex_unlock
(
&
(
this
->
queue_lock
));
return
t
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/include/threadsafe_queue.hpp
0 → 100644
View file @
d2d47b86
#ifndef __THREADSAFE_QUEUE_H__
#define __THREADSAFE_QUEUE_H__
#include <queue>
#include <pthread.h>
#include <infiniband/verbs.h>
#include "queue_context.hpp"
#include "../transport_api/transport_config.hpp"
class
Thread_Safe_Queue
{
public:
std
::
queue
<
struct
job_context
*>
q
;
pthread_mutex_t
queue_lock
;
size_t
queue_size
;
Thread_Safe_Queue
();
Thread_Safe_Queue
(
size_t
n
,
job_context
*
jobs
[]);
~
Thread_Safe_Queue
();
void
enqueue
(
job_context
*
job
);
void
enqueue
(
TCP_Transport
*
transport
,
uint8_t
opcode
);
void
enqueue
(
RDMA_Transport
*
transport
,
uint8_t
opcode
);
void
dequeue
();
job_context
*
front
();
job_context
*
get_job
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_client.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-04-nic"
]
//the nic
};
int
conn_port
=
8888
;
enum
Entity
self_id
=
CLIENT
;
int
main
()
{
char
*
ipstr
=
(
char
*
)
malloc
(
max_ip_cmd_len
);
vector
<
string
>
tokens
;
Params
parameters
(
"config/client_config.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
if
(
debug
)
{
parameters
.
print_vals
();
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
for
(
string
conn_addr
:
conn_addrs
)
{
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher
.
add_connection
(
conn_addrs
[
0
],
conn_port
,
parameters
);
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
if
(
debug
)
{
printf
(
"Connected
\n
"
);
}
if
(
parameters
.
interactive_mode
)
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
memset
(
ipstr
,
0
,
max_ip_cmd_len
);
fgets
(
ipstr
,
max_ip_cmd_len
,
stdin
);
if
(
debug
)
{
printf
(
"Entered Command: %s
\n
"
,
ipstr
);
}
tokens
=
tokenize
(
string
(
ipstr
));
if
(
debug
)
{
printf
(
"Extracted tokens are:
\n
"
);
printf
(
"%s
\t\t
%s
\t\t
%s
\n
"
,
tokens
[
0
].
c_str
(),
tokens
[
1
].
c_str
(),
tokens
[
2
].
c_str
());
//Assume its cmd arg1 arg2 form for now
for
(
int
i
=
0
;
i
<
tokens
.
size
();
i
++
)
{
printf
(
"%s
\t
"
,
tokens
[
i
].
c_str
());
}
printf
(
"
\n
"
);
}
dispatcher
.
issue_cmd
(
tokens
);
}
}
else
{
string
s
;
for
(
int
i
=
0
;
i
<
parameters
.
rdma_mr_size_bytes
-
150
;
i
++
)
s
.
push_back
((
char
)(
i
%
256
));
for
(
int
i
=
0
;
i
<
max_req
;
i
++
)
{
write_rpc
(
dispatcher
.
conn_pool
->
rdma_connection_pool
[
0
],
"asddfgdsdhaskjdhaskjdhsakjdhskjdhaskjhdaskjdashdkjashdkasjhdhhhh"
,
64
,
s
.
c_str
(),
1024
);
printf
(
"Completed request no. %d
\n
"
,
i
);
}
}
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
response_buffer_wait_time
)
{
response_buffer_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
client_rtt_time
)
{
client_rtt_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Response Buffer Wait Time Avg: "
<<
response_buffer_wait_time_sum
/
response_buffer_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Client RTT Avg: "
<<
client_rtt_time_sum
/
client_rtt_time
.
size
()
<<
" seconds"
<<
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_job_clientTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
bool
debug
=
false
;
int
max_packet_size_bytes
=
0
;
chrono
::
duration
<
double
>
max_cq_poll_timeout
(
0.0
);
Thread_Safe_Queue
*
job_queue
=
new
Thread_Safe_Queue
();
int
conn_port
=
8888
;
const
char
*
t
=
"Hello from sender!!!"
;
const
char
*
tt
=
"!!!rednes morf olleH"
;
int
main
()
{
Params
parameters
(
"config/config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Connection_Pool
conn_pool
(
parameters
.
transport_type
);
conn_pool
.
make_connection
(
string
(),
parameters
.
conn_port
,
parameters
);
write_request
(
conn_pool
.
rdma_connection_pool
.
front
(),
"123@"
,
4
,
"hello"
,
5
);
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_job_serverTest.cc
0 → 100644
View file @
d2d47b86
#include <chrono>
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
bool
debug
=
false
;
int
max_packet_size_bytes
=
0
;
chrono
::
duration
<
double
>
max_cq_poll_timeout
(
0.0
);
Thread_Safe_Queue
*
job_queue
=
new
Thread_Safe_Queue
();
vector
<
string
>
conn_addrs
=
{
"192.168.200.20"
,
"192.168.200.30"
,
"192.168.210.40"
,
//this is the client
"192.168.200.50"
};
string
client_addr
(
"192.168.210.40"
);
int
conn_port
=
8888
;
unordered_map
<
char
*
,
char
*>
log_map
;
vector
<
pair
<
char
*
,
char
*>>
log
;
//need to generate num_entries pairs of key-value pairs
//these keys should obey some hash functions such that
//when the hash function is applied to these keys the
//hash should lead to this particular server id
void
populate_map
(
size_t
key_size
,
size_t
value_size
,
int
num_entries
)
{
}
int
main
()
{
Params
parameters
(
"config/config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
for
(
string
addr
:
conn_addrs
)
{
dispatcher
.
add_connection
(
addr
,
conn_port
,
parameters
);
cout
<<
"Connected to: "
<<
addr
<<
":"
<<
conn_port
<<
endl
;
}
while
(
true
)
{
vector
<
job_context
*>
jrs
=
dispatcher
.
get_jobs
();
cout
<<
"Got: "
<<
jrs
.
size
()
<<
" jobs"
<<
endl
;
dispatcher
.
assign_jobs
(
jrs
);
cout
<<
"Current Status of Job Queue: "
<<
job_queue
->
q
.
size
()
<<
" jobs, unattended"
<<
endl
;
cout
<<
"The latest job posted at: "
<<
(
chrono
::
steady_clock
::
now
()
-
job_queue
->
q
.
front
()
->
job_post_time
).
count
()
<<
endl
;
while
(
!
job_queue
->
q
.
empty
())
{
job_context
*
j
=
job_queue
->
get_job
();
char
*
jstart
=
(
char
*
)
j
->
rdma_transport
->
get_mr_addr
();
Common_Request
*
cr
=
j
->
request
;
char
*
kvstart
=
jstart
+
sizeof
(
Common_Request
);
cout
<<
kvstart
<<
endl
;
job_queue
->
dequeue
();
}
}
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/integrated_nic.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
vector
<
string
>
conn_addrs
=
{
//machine_allocation_ips["ub-05"],
machine_allocation_ips
[
"ub-04"
],
machine_allocation_ips
[
"ub-08"
]
//the client
};
string
client_addr
(
machine_allocation_ips
[
"ub-08"
]);
int
conn_port
=
8888
;
enum
Entity
self_id
=
NIC
;
int
main
()
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
chrono
::
duration
<
double
>
total_time
;
Params
parameters
(
"config/nic_config.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
if
(
debug
)
{
parameters
.
print_vals
();
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
dispatcher
.
setup_common_tcp_conn
(
conn_port
);
dispatcher
.
common_socket_start_listen
();
for
(
string
addr
:
conn_addrs
)
{
if
(
dispatcher
.
add_conn_on_common_tcp
(
parameters
))
{
perror
(
"Unable to add connection"
);
return
1
;
}
if
(
debug
)
{
printf
(
"Connected to %s:%d
\n
"
,
addr
.
c_str
(),
conn_port
);
}
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
while
(
1
)
{
dispatcher
.
assign_jobs
(
dispatcher
.
get_jobs
());
dispatcher
.
service_send_queue
();
if
(
req_cnt
==
max_req
)
{
break
;
}
}
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
}
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
response_buffer_wait_time
)
{
response_buffer_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
master_backup_ack_time
)
{
master_backup_ack_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
total_time
=
end_time
-
start_time
;
cout
<<
"Throughput: "
<<
req_cnt
/
total_time
.
count
()
<<
" requests/second"
<<
endl
;
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Response Buffer Wait Time Avg: "
<<
response_buffer_wait_time_sum
/
response_buffer_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg Time for Nic to get all acks from servers: "
<<
master_backup_ack_time_sum
/
master_backup_ack_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_receiverTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
bool
debug
=
false
;
int
max_packet_size_bytes
=
0
;
chrono
::
duration
<
double
>
max_cq_poll_timeout
(
0.0
);
Thread_Safe_Queue
*
job_queue
=
new
Thread_Safe_Queue
();
vector
<
string
>
conn_addrs
=
{
"192.168.200.20"
,
//"192.168.200.30",
"192.168.210.40"
,
//this is the client
//"192.168.200.50"
};
string
client_addr
(
"192.168.210.40"
);
int
conn_port
=
8888
;
int
main
()
{
Params
parameters
(
"config/config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
dispatcher
.
add_connection
(
client_addr
,
conn_port
,
parameters
);
cout
<<
"Connected to: "
<<
client_addr
<<
":"
<<
conn_port
<<
endl
;
char
*
testbuf
=
(
char
*
)
malloc
(
512
);
memset
(
testbuf
,
0
,
512
);
vector
<
RDMA_Transport
*>
fds
=
dispatcher
.
conn_pool
->
get_rdma_request_conns
();
TCP_Transport
*
tcp_transport
=
fds
.
front
()
->
get_tcp_conn
();
cout
<<
"Got write from: "
<<
tcp_transport
->
get_ip
()
<<
endl
;
tcp_transport
->
recv_data
(
testbuf
,
21
);
cout
<<
"Got: "
<<
testbuf
<<
endl
;
memset
(
testbuf
,
0
,
512
);
fds
=
dispatcher
.
conn_pool
->
get_rdma_request_conns
();
tcp_transport
=
fds
.
front
()
->
get_tcp_conn
();
cout
<<
"Got write from: "
<<
tcp_transport
->
get_ip
()
<<
endl
;
tcp_transport
->
recv_data
(
testbuf
,
21
);
cout
<<
"Got: "
<<
testbuf
<<
endl
;
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_senderTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include <unistd.h>
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
bool
debug
=
false
;
int
max_packet_size_bytes
=
0
;
chrono
::
duration
<
double
>
max_cq_poll_timeout
(
0.0
);
Thread_Safe_Queue
*
job_queue
=
new
Thread_Safe_Queue
();
int
conn_port
=
8888
;
const
char
*
t
=
"Hello from sender!!!"
;
const
char
*
tt
=
"!!!rednes morf olleH"
;
int
main
()
{
Params
parameters
(
"config/config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Connection_Pool
conn_pool
(
parameters
.
transport_type
);
conn_pool
.
make_connection
(
string
(),
parameters
.
conn_port
,
parameters
);
TCP_Transport
*
tcp_transport
=
conn_pool
.
rdma_connection_pool
[
0
]
->
get_tcp_conn
();
tcp_transport
->
send_data
(
t
,
21
);
tcp_transport
->
send_data
(
tt
,
21
);
//conn_pool.rdma_connection_pool[0]->copy_to_mr(t);
//conn_pool.rdma_connection_pool[0]->one_sided_write();
//conn_pool.rdma_connection_pool[0]->poll_cq();
//write_rpc(conn_pool.rdma_connection_pool[0], "123", 3, "hello", 5);
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/integrated_server.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/hash.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
// using namespace std; // can't use because some namespace ambiguity conflicts in included header files
namespace
chrono
=
std
::
chrono
;
std
::
vector
<
std
::
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-04-nic"
]
//only the one nic for now
};
int
conn_port
=
8888
;
enum
Entity
self_id
=
SERVER
;
int
main
()
{
// setup configuration from a config file
Params
parameters
(
"config/server_config.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
if
(
debug
)
{
parameters
.
print_vals
();
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
// setup connections
if
(
debug
)
{
printf
(
"---Setting up connections---
\n
"
);
}
for
(
std
::
string
conn_addr
:
conn_addrs
)
{
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher
.
add_connection
(
conn_addrs
[
0
],
conn_port
,
parameters
);
if
(
debug
)
{
printf
(
"Connected to %s:%d
\n
"
,
conn_addr
,
conn_port
);
}
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
while
(
1
)
{
dispatcher
.
assign_jobs
(
dispatcher
.
get_jobs
());
dispatcher
.
service_send_queue
();
if
(
req_cnt
==
max_req
)
{
break
;
}
}
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
worker_write_service_time
)
{
worker_write_service_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
append_log_time
)
{
append_log_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
std
::
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
std
::
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
std
::
cout
<<
"Write service time Avg: "
<<
worker_write_service_time_sum
/
worker_write_service_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
std
::
cout
<<
"Append log time Avg: "
<<
append_log_time_sum
/
append_log_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
std
::
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
std
::
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
std
::
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/metadata.hpp
0 → 100644
View file @
d2d47b86
#ifndef __METADATA__
#define __METADATA__
#include <stdlib.h>
#include <unordered_map>
const
uint
key_len
=
128
;
const
uint
val_len
=
256
;
const
short
int
num_status
=
33
;
const
int
object_size
=
(
1
<<
16
);
//128;
const
int
cache_meta_size
=
1024
;
typedef
enum
Status
{
/// Default return value when an operation was successful.
STATUS_OK
=
0
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given tablet, but that it may exist elsewhere in the system.
/// When it's possible that the tablet exists on another server, this
/// status should be returned (in preference to the definitive
/// TABLE_DOESNT_EXIST).
STATUS_UNKNOWN_TABLET
=
1
,
/// Indicates that a table does not exist anywhere in the system. At present
/// only the coordinator can say with certainly that a table does not exist.
STATUS_TABLE_DOESNT_EXIST
=
2
,
/// Indicates that an object does not exist anywhere in the system. Note
/// that unlike with tables there is no UNKNOWN_OBJECT status. This is just
/// because servers will reject operations on objects in unknown tables with
/// a table-related status. If they own a particular tablet, then they can
/// say with certainty if an object exists there or not.
STATUS_OBJECT_DOESNT_EXIST
=
3
,
STATUS_OBJECT_EXISTS
=
4
,
STATUS_WRONG_VERSION
=
5
,
STATUS_NO_TABLE_SPACE
=
6
,
STATUS_MESSAGE_TOO_SHORT
=
7
,
STATUS_UNIMPLEMENTED_REQUEST
=
8
,
STATUS_REQUEST_FORMAT_ERROR
=
9
,
STATUS_RESPONSE_FORMAT_ERROR
=
10
,
STATUS_COULDNT_CONNECT
=
11
,
STATUS_BACKUP_BAD_SEGMENT_ID
=
12
,
/// Returned by backups when they cannot (or do not wish to) allocate
/// space for a segment replica.
STATUS_BACKUP_OPEN_REJECTED
=
13
,
STATUS_BACKUP_SEGMENT_OVERFLOW
=
14
,
STATUS_BACKUP_MALFORMED_SEGMENT
=
15
,
STATUS_SEGMENT_RECOVERY_FAILED
=
16
,
/// Indicates that a server is not prepared to handle a request at
/// the present time; the caller should retry at a later time. This
/// status can be returned under many different situations, such as
/// (a) the server is out of resources to execute the request, or
/// (b) the server is not sure it actually has authority to execute
/// the request, and is checking with the coordinator.
STATUS_RETRY
=
17
,
/// Indicates that the RPC requested an unknown service.
STATUS_SERVICE_NOT_AVAILABLE
=
18
,
STATUS_TIMEOUT
=
19
,
/// Indicates that server to which an RPC is directed either never existed,
/// has come and gone, or is currently in crashed state. The server is not
/// in a position to respond to RPCs and probably never will be again
/// (unless the id hasn't yet existed; once a server crashes its id will
/// never be reused).
STATUS_SERVER_NOT_UP
=
20
,
STATUS_INTERNAL_ERROR
=
21
,
/// Indicates that the object chosen for an operation does not match the
/// associated requirements. Therefore the chosen object is invalid.
STATUS_INVALID_OBJECT
=
22
,
/// Indicates that a tablet does not exist. This status is of relevance
/// when doing split or merge operations on tablets are executed.
STATUS_TABLET_DOESNT_EXIST
=
23
,
/// Indicates that the logic to partition tablets was invoked without a
/// preceeding invocation to start reading replicas off of disk.
STATUS_PARTITION_BEFORE_READ
=
24
,
/// Indicates that an RPC was intended for a particular server id, but
/// was actually sent to a different server id.
STATUS_WRONG_SERVER
=
25
,
/// Indicates that the server sending an RPC is not present in the
/// server list of the RPC recipient. Used to help servers discover
/// that they are zombies (the rest of the cluster thinks a zombie
/// is dead, but the zombie thinks it's still alive), so they don't
/// continue servicing requests when other servers have already
/// taken over their tablets. See "Zombies" in designNotes.
STATUS_CALLER_NOT_IN_CLUSTER
=
26
,
/// Indicates that a single request was too big to fit in an rpc and
/// thus could not be sent/carried out.
STATUS_REQUEST_TOO_LARGE
=
27
,
/// Indicates that the server does not know about (and is not responsible
/// for) a given indexlet, but that it may exist elsewhere in the system.
/// When it's possible that the indexlet exists on another server, this
/// status should be returned (in preference to the definitive
/// INDEX_DOESNT_EXIST).
STATUS_UNKNOWN_INDEXLET
=
28
,
/// Indicates that an index does not exist anywhere in the system. At
/// present only the coordinator can say with certainly that an index does
/// not exist.
STATUS_INDEX_DOESNT_EXIST
=
29
,
/// Indicates that a parameter provided by the client is invalid (for
/// example: it is outside allowed bounds).
STATUS_INVALID_PARAMETER
=
30
,
/// Indicates that client already received the result of the rpc.
/// It does not make sense to execute the RPC again. Most likely cause
/// is a delayed network packet.
STATUS_STALE_RPC
=
31
,
/// Indicates that the lease of a client is expired on the coordinator.
/// Master refused to execute the RPC with expired lease.
STATUS_EXPIRED_LEASE
=
32
,
/// Indicates that a client tried to perform transaction operations after
/// the transaction commit had already started.
STATUS_TX_OP_AFTER_COMMIT
=
33
,
STATUS_MAX_VALUE
=
33
,
// Note: if you add a new status value you must make the following
// additional updates:
// * Modify STATUS_MAX_VALUE to have a value equal to the largest
// defined status value, and make sure its definition is the last one
// in the list. STATUS_MAX_VALUE is used primarily for testing.
// * Add new entries in the tables "messages" and "symbols" in Status.cc.
// * Add a new exception class to ClientException.h
// * Add a new "case" to ClientException::throwException to map from
// the status value to a status-specific ClientException subclass.
// * In the Java bindings, add a static class for the exception to
// ClientException.java
// * Add a case for the status of the exception to throw the exception in
// ClientException.java
// * Add the exception to the Status enum in Status.java, making
// sure the status is in the correct position corresponding to its status
// code.
}
Status
;
struct
RejectRules
{
uint64_t
givenVersion
;
uint8_t
doesntExist
;
uint8_t
exists
;
uint8_t
versionLeGiven
;
uint8_t
versionNeGiven
;
}
__attribute__
((
packed
));
struct
object_metadata
{
long
long
key
;
bool
valid
;
uint64_t
version
;
char
*
value_ptr
;
};
//NIC mem
extern
std
::
unordered_map
<
long
long
int
,
struct
object_metadata
>
obj_table
;
union
object
{
char
ph
[
object_size
];
struct
obj
{
Status
status
;
long
long
key
;
char
value
[
4
];
int
version
;
unsigned
long
send_ts
;
}
obj
;
};
#endif
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_client.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <sys/time.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const
short
int
MODE
=
0
;
char
*
SERVER_HOST
=
"192.168.200.20"
;
//char *SERVER_NIC = "192.168.200.21";
//const int NIC_PORT = 8090;
const
int
SERVER_PORT
=
8090
;
const
double
err_fraction
=
0.5
;
int
main
(
int
argc
,
char
*
argv
[])
{
char
*
temp
=
(
char
*
)
malloc
(
128
);
struct
resource_base
*
base
;
base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
base
);
base
->
server_name
=
SERVER_HOST
;
base
->
ib_port
=
IB_PORT
;
base
->
gid_idx
=
GID_IDX
;
base
->
port
=
SERVER_PORT
;
base
->
mr_buf_addr
=
(
char
*
)
malloc
(
base
->
mr_size
);
strcpy
(
base
->
mr_buf_addr
,
"Hi from client
\0
"
);
sock_connect
(
base
);
sync_remote_qp
(
base
,
base
->
mr_buf_addr
,
temp
,
15
);
union
object
test_obj
;
union
object
ret_obj
;
memset
(
&
test_obj
,
0
,
sizeof
(
test_obj
));
memset
(
&
ret_obj
,
0
,
sizeof
(
ret_obj
));
test_obj
.
obj
.
key
=
1
;
test_obj
.
obj
.
value
[
0
]
=
'T'
;
test_obj
.
obj
.
version
=
1
;
test_obj
.
obj
.
status
=
STATUS_OK
;
struct
timeval
temp_time
;
double
snd_ts
,
rcvd_ts
;
double
avg
=
0
;
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
enum
Status
ret_status
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
test_obj
.
obj
.
key
=
i
;
gettimeofday
(
&
temp_time
,
NULL
);
snd_ts
=
((
double
)
temp_time
.
tv_sec
*
1000.0
)
+
((
double
)
temp_time
.
tv_usec
/
1000.0
);
test_obj
.
obj
.
send_ts
=
snd_ts
;
send_and_check
(
base
,
(
char
*
)
&
test_obj
,
(
char
*
)
&
ret_status
,
sizeof
(
test_obj
),
sizeof
(
ret_status
));
gettimeofday
(
&
temp_time
,
NULL
);
rcvd_ts
=
((
double
)
temp_time
.
tv_sec
*
1000.0
)
+
((
double
)
temp_time
.
tv_usec
/
1000.0
);
avg
+=
(
rcvd_ts
-
snd_ts
);
if
(
ret_status
==
STATUS_WRONG_VERSION
)
err_cnt
++
;
else
succ_cnt
++
;
}
std
::
cout
<<
"Errored requests: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Successful requests: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg. RTT with 1 other replica: "
<<
avg
/
(
double
)
cache_meta_size
<<
" ms"
<<
std
::
endl
;
cleanup
(
base
);
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_server.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <vector>
#include "include/common.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-05"
],
machine_allocation_ips
[
"ub-08"
],
//the client
};
string
client_addr
(
machine_allocation_ips
[
"ub-08"
]);
int
conn_port
=
8888
;
enum
Entity
self_id
=
SERVER
;
int
main
()
{
Params
parameters
(
"config/master_server_config.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
if
(
debug
)
{
parameters
.
print_vals
();
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
dispatcher
.
setup_common_tcp_conn
(
conn_port
);
dispatcher
.
common_socket_start_listen
();
for
(
string
addr
:
conn_addrs
)
{
if
(
dispatcher
.
add_conn_on_common_tcp
(
parameters
))
{
perror
(
"Unable to add connection"
);
return
1
;
}
if
(
debug
)
{
printf
(
"Connected to %s:%d
\n
"
,
addr
.
c_str
(),
conn_port
);
}
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
while
(
1
)
{
dispatcher
.
assign_jobs
(
dispatcher
.
get_jobs
());
if
(
debug
)
{
printf
(
"Current job queue size: %d
\n
"
,
(
int
)
job_queue
->
queue_size
);
}
dispatcher
.
service_send_queue
();
if
(
req_cnt
==
100
)
{
break
;
}
}
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
response_buffer_wait_time
)
{
response_buffer_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
master_backup_ack_time
)
{
master_backup_ack_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Response Buffer Wait Time Avg: "
<<
response_buffer_wait_time_sum
/
response_buffer_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg Time for Master Server to get all acks from backup servers: "
<<
master_backup_ack_time_sum
/
master_backup_ack_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/nonoffloaded_server.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const
int
num_servers
=
1
;
const
short
int
MODE
=
2
;
char
*
SERVER_HOST
[
num_servers
]
=
{
//"192.168.200.20",
"192.168.200.50"
};
//char *SERVER_NIC = "192.168.200.21";
const
int
SERVER_PORT
=
8080
;
const
int
CLIENT_PORT
=
8090
;
const
short
int
dev_num
=
0
;
const
double
err_fraction
=
0.5
;
struct
object_metadata
obj_metas
[
cache_meta_size
];
std
::
unordered_map
<
long
long
int
,
struct
object_metadata
>
obj_table
;
void
populate_cache_meta
(
std
::
unordered_map
<
long
long
,
struct
object_metadata
>&
m
)
{
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
m
[
i
]
=
obj_metas
[
i
];
m
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
m
[
i
].
valid
=
false
;
else
m
[
i
].
valid
=
true
;
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
srand
(
time
(
NULL
));
char
*
temp
=
(
char
*
)
malloc
(
128
);
//nic connects to servers
struct
resource_base
*
server_base
[
num_servers
];
struct
ibv_port_attr
server_port_attr
[
num_servers
];
union
ibv_gid
server_gid
[
num_servers
];
for
(
int
i
=
0
;
i
<
num_servers
;
i
++
)
{
server_base
[
i
]
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
server_base
[
i
]);
server_base
[
i
]
->
mode
=
1
;
server_base
[
i
]
->
ib_port
=
1
;
server_base
[
i
]
->
gid_idx
=
1
;
server_base
[
i
]
->
server_name
=
SERVER_HOST
[
i
];
server_base
[
i
]
->
port
=
SERVER_PORT
;
open_dev
(
server_base
[
i
],
dev_num
);
allocate_pd
(
server_base
[
i
]);
register_mr
(
server_base
[
i
]);
init_cq
(
server_base
[
i
]);
init_qp
(
server_base
[
i
]);
if
(
ibv_query_port
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
&
server_port_attr
[
i
]))
D
(
err_msg
(
"ibv_query_port"
,
true
,
server_base
[
i
]));
if
(
server_port_attr
[
i
].
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
server_base
[
i
]));
server_base
[
i
]
->
port_attr
=
&
server_port_attr
[
i
];
set_mtu
(
server_base
[
i
]
->
port_attr
,
&
(
server_base
[
i
]
->
mtu
));
if
(
ibv_query_gid
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
server_base
[
i
]
->
gid_idx
,
&
server_gid
[
i
]))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
server_base
[
i
]));
memcpy
(
server_base
[
i
]
->
local_conn
->
gid
,
&
server_gid
[
i
],
16
);
connect_qp
(
server_base
[
i
]);
sync_remote_qp
(
server_base
[
i
],
"T"
,
temp
,
1
);
if
(
poll_completion
(
server_base
[
i
]))
D
(
err_msg
(
"IBV_WR_SEND"
,
false
,
server_base
[
i
]));
}
//temporary memory host memory sim
char
*
temp_mem_sim
=
(
char
*
)
malloc
(
server_base
[
0
]
->
mr_size
);
//server connects to client
struct
resource_base
*
client_base
;
client_base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
client_base
);
client_base
->
mode
=
1
;
client_base
->
ib_port
=
1
;
client_base
->
gid_idx
=
1
;
client_base
->
port
=
CLIENT_PORT
;
client_base
->
mr_buf_addr
=
(
char
*
)
malloc
(
client_base
->
mr_size
);
sock_connect
(
client_base
);
char
*
tp
=
(
char
*
)
malloc
(
128
);
sync_remote_qp
(
client_base
,
"Hi from client
\0
"
,
client_base
->
mr_buf_addr
,
15
);
// std::cout<<"CLIENT SENT"<<std::endl;
// std::cout<<client_base->mr_buf_addr<<std::endl;
// std::cout<<"NIC CONNECTED TO CLIENT"<<std::endl;
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
populate_cache_meta
(
obj_table
);
obj_table
[
0
].
valid
=
false
;
union
object
sent_obj
;
enum
Status
send_succ
=
STATUS_OK
;
enum
Status
send_err
=
STATUS_WRONG_VERSION
;
struct
timeval
temp_time
;
double
s_time
,
e_time
;
double
avg_time_diff
=
0.0
;
int
ack_cnt
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
memset
(
&
sent_obj
,
0
,
sizeof
(
sent_obj
));
read_obj
(
client_base
,
(
char
*
)
&
sent_obj
,
sizeof
(
sent_obj
));
if
(
obj_table
[
sent_obj
.
obj
.
key
].
valid
)
{
for
(
resource_base
*
server
:
server_base
)
memcpy
((
void
*
)
server
->
mr_buf_addr
,
(
void
*
)
&
sent_obj
,
sizeof
(
sent_obj
));
gettimeofday
(
&
temp_time
,
NULL
);
s_time
=
((
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
);
//sync_remote_qp(server_base, "W", temp, 1); //prepare
//post_send(server_base, IBV_WR_RDMA_WRITE);
// if(poll_completion(server_base))
// D(err_msg("IBV_WR_RDMA_WRITE", true, server_base));
ack_cnt
=
0
;
for
(
resource_base
*
server
:
server_base
)
{
sync_remote_qp
(
server
,
"W"
,
temp
,
1
);
}
for
(
resource_base
*
server
:
server_base
)
{
sync_remote_qp
(
server
,
"T"
,
temp
,
1
);
//ack
if
(
temp
[
0
]
==
'S'
)
ack_cnt
++
;
temp
[
0
]
=
'T'
;
}
if
(
ack_cnt
==
num_servers
)
{
gettimeofday
(
&
temp_time
,
NULL
);
e_time
=
((
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
);
avg_time_diff
+=
(
e_time
-
s_time
);
succ_cnt
++
;
send_obj
(
client_base
,
(
char
*
)
&
send_succ
,
sizeof
(
send_succ
));
}
}
else
{
send_obj
(
client_base
,
(
char
*
)
&
send_err
,
sizeof
(
send_err
));
err_cnt
++
;
}
}
avg_time_diff
/=
(
double
)
succ_cnt
;
std
::
cout
<<
"Errored requests: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Successful requests: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg. replication time for storage server with 1 replica : "
<<
avg_time_diff
<<
std
::
endl
;
for
(
resource_base
*
server
:
server_base
)
sync_remote_qp
(
server
,
"X"
,
temp
,
1
);
//for(int i=0; i<1000; i++) sync_remote_qp(client_base, "T", tp, 1);
for
(
resource_base
*
server
:
server_base
)
cleanup
(
server
);
cleanup
(
client_base
);
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedclient.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <unistd.h>
#include <sys/time.h>
#include <string.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "transport-helper.hpp"
#include "metadata.hpp"
const
short
int
MODE
=
2
;
char
*
SERVER_HOST
=
"192.168.200.20"
;
const
int
SERVER_PORT
=
8888
;
const
short
int
dev_num
=
0
;
int
main
(
int
argc
,
char
*
argv
[])
{
char
*
temp
=
(
char
*
)
malloc
(
128
);
struct
resource_base
*
base
;
struct
timeval
t_time
;
double
start_ts
,
end_ts
;
double
avg_time
;
base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
base
);
base
->
mode
=
MODE
;
base
->
ib_port
=
IB_PORT
;
base
->
gid_idx
=
GID_IDX
;
base
->
port
=
SERVER_PORT
;
open_dev
(
base
,
dev_num
);
allocate_pd
(
base
);
register_mr
(
base
);
init_cq
(
base
);
init_qp
(
base
);
struct
ibv_port_attr
port_attr
;
if
(
ibv_query_port
(
base
->
ctx
,
base
->
ib_port
,
&
port_attr
))
D
(
err_msg
(
"ibv_query_port"
,
true
,
base
));
if
(
port_attr
.
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT INACTIVE"
,
true
,
base
));
base
->
port_attr
=
&
port_attr
;
set_mtu
(
base
->
port_attr
,
&
(
base
->
mtu
));
union
ibv_gid
my_gid
;
if
(
ibv_query_gid
(
base
->
ctx
,
base
->
ib_port
,
base
->
gid_idx
,
&
my_gid
))
D
(
err_msg
(
"IBV_QUERY_GID"
,
true
,
base
));
memcpy
(
base
->
local_conn
->
gid
,
&
my_gid
,
16
);
connect_qp
(
base
);
if
(
poll_completion
(
base
))
D
(
err_msg
(
"poll_completion"
,
false
,
base
));
union
object
obj_to_send
;
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
avg_time
=
0
;
enum
Status
ret_status
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
obj_to_send
.
obj
.
key
=
i
;
memcpy
(
base
->
mr_buf_addr
,
obj_to_send
,
sizeof
(
obj_to_send
));
gettimeofday
(
&
t_time
,
NULL
);
start_ts
=
((
double
)(
t_time
.
tv_sec
)
*
(
double
)(
1000.0
)
+
(
double
)
t_time
.
tv_usec
/
(
double
)(
1000
));
send_obj
(
base
,
"R"
,
1
);
read_obj
(
base
,
temp
,
1
);
if
(
temp
==
"R"
)
{
post_send
(
base
,
IBV_WR_RDMA_READ
);
if
(
poll_completion
(
base
))
D
(
err_msg
(
"poll_completion"
,
true
,
base
));
if
(
*
(
enum
Status
*
)(
base
->
mr_buf_addr
)
==
STATUS_OK
)
succ_cnt
++
;
else
err_cnt
++
;
gettimeofday
(
&
t_time
,
NULL
);
end_ts
=
((
double
)(
t_time
.
tv_usec
)
/
(
double
)(
1000
)
+
(
double
)(
t_time
.
tv_sec
)
*
(
double
)(
1000
));
avg_time
+=
(
start_ts
-
end_ts
);
}
}
avg_time
/=
(
double
)
cache_meta_size
;
std
::
cout
<<
"Err count: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Succ count: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg RTT: "
<<
avg_time
<<
" ms"
<<
std
::
endl
;
cleanup
(
base
);
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/nonoffloadedserver.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const
int
num_replicas
=
1
;
char
*
SERVER_HOST
[
num_replicas
]
=
{
"192.168.200.50"
};
const
int
CLIENT_PORT
=
8888
;
const
int
SERVER_PORT
=
9999
;
const
short
int
dev_num
=
0
;
const
double
err_fraction
=
0
;
struct
object_metadata
obj_metas
[
cache_meta_size
];
std
::
unordered_map
<
long
long
int
,
struct
object_metadata
>
obj_table
;
void
populate_cache_meta
(
std
::
unordered_map
<
long
long
,
struct
object_metadata
>&
m
)
{
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
m
[
i
]
=
obj_metas
[
i
];
m
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
m
[
i
].
valid
=
false
;
else
m
[
i
].
valid
=
true
;
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
srand
(
time
(
NULL
));
char
*
temp
=
(
char
*
)
malloc
(
128
);
struct
timeval
t_time
;
double
start_ts
,
end_ts
,
avg_time
;
struct
resource_base
*
server_base
[
num_replicas
];
struct
resource_base
*
client_base
;
struct
ibv_port_attr
server_port_attr
[
num_replicas
];
struct
ibv_port_attr
client_port_attr
;
union
ibv_gid
server_gid
[
num_replicas
];
union
ibv_gid
client_gid
;
//connecting to other server(s)
for
(
int
i
=
0
;
i
<
num_replicas
;
i
++
)
{
server_base
[
i
]
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
server_base
[
i
]);
server_base
[
i
]
->
mode
=
1
;
server_base
[
i
]
->
ib_port
=
1
;
server_base
[
i
]
->
gid_idx
=
1
;
server_base
[
i
]
->
server_name
=
SERVER_HOST
[
i
];
server_base
[
i
]
->
port
=
SERVER_PORT
;
open_dev
(
server_base
[
i
],
dev_num
);
allocate_pd
(
server_base
[
i
]);
register_mr
(
server_base
[
i
]);
init_cq
(
server_base
[
i
]);
init_qp
(
server_base
[
i
]);
if
(
ibv_query_port
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
&
server_port_attr
[
i
]))
D
(
err_msg
(
"ibv_query_port"
,
true
,
server_base
[
i
]));
if
(
server_port_attr
[
i
].
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
server_base
[
i
]));
server_base
[
i
]
->
port_attr
=
&
server_port_attr
[
i
];
set_mtu
(
server_base
[
i
]
->
port_attr
,
&
(
server_base
[
i
]
->
mtu
));
if
(
ibv_query_gid
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
server_base
[
i
]
->
gid_idx
,
&
server_gid
[
i
]))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
server_base
[
i
]));
memcpy
(
server_base
[
i
]
->
local_conn
->
gid
,
&
server_gid
[
i
],
16
);
connect_qp
(
server_base
[
i
]);
sync_remote_qp
(
server_base
[
i
],
"T"
,
temp
,
1
);
if
(
poll_completion
(
server_base
[
i
]))
D
(
err_msg
(
"IBV_WR_SEND"
,
false
,
server_base
[
i
]));
}
//primary connects to client
client_base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
client_base
);
client_base
->
ib_port
=
IB_PORT
;
client_base
->
gid_idx
=
GID_IDX
;
client_base
->
port
=
CLIENT_PORT
;
open_dev
(
client_base
,
dev_num
);
allocate_pd
(
client_base
);
register_mr
(
client_base
);
init_cq
(
client_base
);
init_qp
(
client_base
);
if
(
ibv_query_port
(
client_base
->
ctx
,
client_base
->
ib_port
,
&
(
client_port_attr
)))
D
(
err_msg
(
"CLIENT PORT ATTR"
,
true
,
client_base
));
if
(
client_port_attr
.
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"CLIENT PORT NOT ACTIVE"
,
true
,
client_base
));
client_base
->
port_attr
=
&
(
client_port_attr
);
if
(
ibv_query_gid
(
client_base
->
ctx
,
client_base
->
ib_port
,
client_base
->
gid_idx
,
&
(
client_gid
)))
D
(
err_msg
(
"CLIENT QUERY GID"
,
true
,
client_base
));
memcpy
(
client_base
->
local_conn
->
gid
,
&
(
client_gid
),
16
);
connect_qp
(
client_base
);
if
(
poll_completion
(
client_base
))
D
(
err_msg
(
"client poll completion"
,
true
,
client_base
));
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
int
ack_cnt
=
0
;
populate_cache_meta
(
obj_table
);
union
object
sent_obj
;
enum
Status
send_succ
=
STATUS_OK
;
enum
Status
send_err
=
STATUS_WRONG_VERSION
;
avg_time
=
0
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
gettimeofday
(
&
t_time
,
NULL
);
start_ts
=
((
double
)
t_time
.
tv_sec
*
(
1000.0
)
+
(
double
)
t_time
.
tv_usec
/
(
1000.0
));
read_obj
(
client_base
,
temp
,
1
);
if
(
temp
[
0
]
==
'R'
)
{
post_send
(
client_base
,
IBV_WR_RDMA_READ
);
if
(
poll_completion
(
client_base
))
D
(
err_msg
(
"Client obj read poll completion"
,
true
,
client_base
));
if
(
obj_table
[((
union
object
*
)
client_base
->
mr_buf_addr
)
->
obj
.
key
].
valid
)
{
for
(
struct
resource_base
*
server
:
server_base
)
memcpy
((
void
*
)
server
->
mr_buf_addr
,
(
void
*
)(
client_base
->
mr_buf_addr
),
client_base
->
mr_size
);
ack_cnt
=
0
;
for
(
struct
resource_base
*
server
:
server_base
)
sync_remote_qp
(
server
,
"W"
,
temp
,
1
);
for
(
resource_base
*
server
:
server_base
)
{
sync_remote_qp
(
server
,
"T"
,
temp
,
1
);
//ack
if
(
temp
[
0
]
==
'S'
)
ack_cnt
++
;
temp
[
0
]
=
'T'
;
}
if
(
ack_cnt
==
num_replicas
)
{
gettimeofday
(
&
t_time
,
NULL
);
end_ts
=
((
double
)
t_time
.
tv_sec
*
1000.0
+
(
double
)
t_time
.
tv_usec
/
1000.0
);
avg_time
+=
(
end_ts
-
start_ts
);
succ_cnt
++
;
memcpy
((
void
*
)
client_base
->
mr_buf_addr
,
(
void
*
)(
&
send_succ
),
sizeof
(
send_succ
));
send_obj
(
client_base
,
"R"
,
1
);
}
}
else
{
memcpy
((
void
*
)
client_base
->
mr_buf_addr
,
(
void
*
)(
&
send_err
),
sizeof
(
send_err
));
send_obj
(
client_base
,
"R"
,
1
);
err_cnt
++
;
}
}
}
avg_time
/=
(
double
)
succ_cnt
;
std
::
cout
<<
"Errored requests: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Successful requests: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg. replication time for storage server with 1 replica : "
<<
avg_time
<<
std
::
endl
;
for
(
resource_base
*
server
:
server_base
)
sync_remote_qp
(
server
,
"X"
,
temp
,
1
);
for
(
resource_base
*
server
:
server_base
)
cleanup
(
server
);
cleanup
(
client_base
);
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string>
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
#define PLACEHOLDER "hello\0"
void
err_msg
(
std
::
string
msg
,
bool
to_exit
,
struct
resource_base
*
base
)
{
perror
(
msg
.
c_str
());
if
(
to_exit
)
D
(
cleanup
(
base
);
exit
(
0
));
return
;
}
void
cleanup
(
struct
resource_base
*
base
)
{
if
(
base
->
qp
)
ibv_destroy_qp
(
base
->
qp
);
if
(
base
->
mr
)
ibv_dereg_mr
(
base
->
mr
);
if
(
base
->
mr_buf_addr
)
free
(
base
->
mr_buf_addr
);
if
(
base
->
cq
)
ibv_destroy_cq
(
base
->
cq
);
if
(
base
->
pd
)
ibv_dealloc_pd
(
base
->
pd
);
if
(
base
->
ctx
)
ibv_close_device
(
base
->
ctx
);
if
(
base
->
local_sock_fd
>=
0
)
close
(
base
->
local_sock_fd
);
if
(
base
->
conn_fd
>=
0
)
close
(
base
->
conn_fd
);
return
;
}
void
init_resources
(
struct
resource_base
*
base
)
{
base
->
ctx
=
NULL
;
base
->
pd
=
NULL
;
base
->
cq
=
NULL
;
base
->
qp
=
NULL
;
base
->
mr
=
NULL
;
base
->
mr_buf_addr
=
NULL
;
base
->
mr_size
=
((
1
<<
20
)
+
200
);
//128;
base
->
mtu
=
IBV_MTU_512
;
base
->
min_rnr_timer
=
12
;
base
->
timeout
=
12
;
base
->
retry_cnt
=
4
;
base
->
ib_port
=
1
;
base
->
gid_idx
=
1
;
base
->
local_sock_fd
=
-
1
;
base
->
conn_fd
=
-
1
;
base
->
mode
=
-
1
;
base
->
server_name
=
NULL
;
base
->
port
=
-
1
;
base
->
local_conn
=
(
struct
conn_data
*
)
malloc
(
sizeof
(
struct
conn_data
));
base
->
remote_conn
=
(
struct
conn_data
*
)
malloc
(
sizeof
(
struct
conn_data
));
base
->
dev_attr
=
(
struct
ibv_device_attr
*
)
malloc
(
sizeof
(
struct
ibv_device_attr
));
base
->
port_attr
=
(
struct
ibv_port_attr
*
)
malloc
(
sizeof
(
struct
ibv_port_attr
));
return
;
}
bool
query_port
(
struct
resource_base
*
base
)
{
/* Seems to not be working when called as a function,
/* Current implmentation uses this ibv_query_port call directly in main
*/
if
(
base
->
ctx
==
NULL
)
D
(
err_msg
(
"query_port;No device context"
,
true
,
base
));
if
(
base
->
port_attr
->
state
==
IBV_PORT_ACTIVE
)
return
true
;
return
false
;
}
int
sock_connect
(
struct
resource_base
*
base
)
{
if
(
base
->
port
<
0
)
D
(
cleanup
(
base
);
err_msg
(
"tcp_connect"
,
true
,
base
));
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
base
->
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
int
sfd
,
cfd
;
sfd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
);
if
(
sfd
<
0
)
D
(
err_msg
(
"sock_connect; sfd:socket"
,
false
,
base
);
return
-
1
);
if
(
base
->
server_name
==
NULL
)
{
if
(
bind
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
close
(
sfd
);
err_msg
(
"sock_connect;bind"
,
true
,
base
);
}
listen
(
sfd
,
1
);
cfd
=
accept
(
sfd
,
NULL
,
0
);
base
->
conn_fd
=
cfd
;
base
->
local_sock_fd
=
sfd
;
return
0
;
}
else
{
//memset(&(host_addr.sin_addr.s_addr), base->server_name, 4);
inet_aton
(
base
->
server_name
,
&
host_addr
.
sin_addr
);
if
(
connect
(
sfd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
close
(
sfd
);
err_msg
(
"sock_connect;connect"
,
true
,
base
);
}
base
->
conn_fd
=
sfd
;
return
0
;
}
return
-
1
;
}
int
sync_remote_qp
(
struct
resource_base
*
base
,
char
*
local_data
,
char
*
remote_data
,
int
size
)
{
struct
timeval
send_ts
;
unsigned
long
snd_ts
;
int
operation_bytes
=
0
;
int
total_read_bytes
=
0
;
operation_bytes
=
write
(
base
->
conn_fd
,
local_data
,
size
);
if
(
operation_bytes
<
size
)
{
err_msg
(
"sync_remote_qp;write"
,
true
,
base
);
}
while
(
total_read_bytes
<
size
)
{
operation_bytes
=
read
(
base
->
conn_fd
,
remote_data
,
size
);
if
(
operation_bytes
==
0
)
break
;
else
if
(
operation_bytes
>=
0
)
total_read_bytes
+=
operation_bytes
;
else
break
;
}
//if(remote_data[0] == 'R') post_receive(base);
// if(remote_data[0] == 'W') {
// union object sent_obj;
// enum Status ret_status;
// memset(&sent_obj, 0, sizeof(sent_obj));
// read_obj(base, (char *)&sent_obj, sizeof(sent_obj));
// if( != 0) {
// }
// // total_read_bytes = 0;
// // union object ret_obj;
// // memset(&ret_obj, 0, sizeof(ret_obj));
// // while(total_read_bytes < sizeof(ret_obj)) {
// // operation_bytes = read(base->conn_fd, (char *)&ret_obj, sizeof(union object));
// // if(operation_bytes == 0) break;
// // else if(operation_bytes >= 0) total_read_bytes += operation_bytes;
// // else break;
// // }
// // if(ret_obj.obj.version != 4) ret_obj.obj.status = STATUS_WRONG_VERSION;
// // operation_bytes = write(base->conn_fd, (char *)&ret_obj, sizeof(ret_obj));
// // if(operation_bytes < sizeof(ret_obj))
// // D(err_msg("Unable to send obj", false, base));
// }
return
total_read_bytes
;
}
int
open_dev
(
struct
resource_base
*
base
,
short
int
dev_num
)
{
struct
ibv_device
**
dev_list
=
NULL
;
int
num_devs
;
struct
ibv_context
*
dev_ctx
=
NULL
;
dev_list
=
ibv_get_device_list
(
&
num_devs
);
if
(
dev_list
==
NULL
)
D
(
err_msg
(
"open_dev;ibv_get_device_list"
,
true
,
base
));
dev_ctx
=
ibv_open_device
(
dev_list
[
dev_num
]);
if
(
dev_ctx
==
NULL
)
D
(
err_msg
(
"open_dev;ibv_open_device"
,
true
,
base
));
base
->
ctx
=
dev_ctx
;
//std::cout<<ibv_get_device_name(dev_list[0])<<std::endl;
ibv_free_device_list
(
dev_list
);
return
0
;
}
int
allocate_pd
(
struct
resource_base
*
base
)
{
if
(
base
->
ctx
==
NULL
)
D
(
err_msg
(
"allocate_pd;No device context"
,
true
,
base
));
struct
ibv_pd
*
pd
=
NULL
;
pd
=
ibv_alloc_pd
(
base
->
ctx
);
if
(
pd
==
NULL
)
D
(
err_msg
(
"allocate_pd;ibv_alloc_pd"
,
true
,
base
));
base
->
pd
=
pd
;
return
0
;
}
int
register_mr
(
struct
resource_base
*
base
)
{
if
(
base
->
pd
==
NULL
)
D
(
err_msg
(
"register_mr;No PD allocated"
,
true
,
base
));
struct
ibv_mr
*
mr
;
int
flags
=
0
;
char
*
buf
=
(
char
*
)
malloc
(
base
->
mr_size
);
strcpy
(
buf
,
PLACEHOLDER
);
flags
=
IBV_ACCESS_REMOTE_WRITE
|
IBV_ACCESS_REMOTE_READ
|
IBV_ACCESS_LOCAL_WRITE
;
mr
=
ibv_reg_mr
(
base
->
pd
,
(
void
*
)
buf
,
base
->
mr_size
,
flags
);
if
(
mr
==
NULL
)
D
(
err_msg
(
"register_mr;ibv_reg_mr"
,
true
,
base
));
base
->
mr_buf_addr
=
buf
;
base
->
mr
=
mr
;
return
0
;
}
int
init_cq
(
struct
resource_base
*
base
)
{
if
(
base
->
ctx
==
NULL
)
D
(
err_msg
(
"create_cq;No device context"
,
true
,
base
));
if
(
ibv_query_device
(
base
->
ctx
,
base
->
dev_attr
))
D
(
err_msg
(
"create_cq;ibv_query_device"
,
true
,
base
));
base
->
cq
=
ibv_create_cq
(
base
->
ctx
,
3
,
NULL
,
NULL
,
0
);
if
(
base
->
cq
==
NULL
)
D
(
err_msg
(
"create_cq;ibv_create_cq"
,
true
,
base
));
return
0
;
}
int
init_qp
(
struct
resource_base
*
base
)
{
struct
ibv_qp_init_attr
qp_init_attr
;
memset
(
&
qp_init_attr
,
0
,
sizeof
(
qp_init_attr
));
qp_init_attr
.
sq_sig_all
=
1
;
qp_init_attr
.
send_cq
=
base
->
cq
;
qp_init_attr
.
recv_cq
=
base
->
cq
;
qp_init_attr
.
qp_type
=
IBV_QPT_RC
;
qp_init_attr
.
cap
=
{
.
max_send_wr
=
3
,
.
max_recv_wr
=
3
,
.
max_send_sge
=
3
,
.
max_recv_sge
=
3
};
base
->
qp
=
ibv_create_qp
(
base
->
pd
,
&
qp_init_attr
);
if
(
!
base
->
qp
)
D
(
err_msg
(
"ibv_create_qp"
,
true
,
base
));
//std::cout<<"QP num: "<<base->qp->qp_num<<std::endl;
return
0
;
}
void
post_send
(
struct
resource_base
*
base
,
ibv_wr_opcode
opcode
)
{
struct
ibv_send_wr
sr
;
struct
ibv_sge
sge
;
struct
ibv_send_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
base
->
mr_buf_addr
;
sge
.
length
=
base
->
mr_size
;
sge
.
lkey
=
base
->
mr
->
lkey
;
memset
(
&
sr
,
0
,
sizeof
(
sr
));
sr
.
next
=
NULL
;
sr
.
wr_id
=
0
;
sr
.
sg_list
=
&
sge
;
sr
.
num_sge
=
1
;
sr
.
opcode
=
opcode
;
sr
.
send_flags
=
IBV_SEND_SIGNALED
;
if
(
opcode
!=
IBV_WR_SEND
)
{
sr
.
wr
.
rdma
.
remote_addr
=
base
->
remote_conn
->
addr
;
sr
.
wr
.
rdma
.
rkey
=
base
->
remote_conn
->
rkey
;
}
if
(
ibv_post_send
(
base
->
qp
,
&
sr
,
&
bad_wr
))
D
(
err_msg
(
"ibv_post_send"
,
true
,
base
));
return
;
}
void
post_receive
(
struct
resource_base
*
base
)
{
struct
ibv_recv_wr
rr
;
struct
ibv_sge
sge
;
struct
ibv_recv_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
base
->
mr_buf_addr
;
sge
.
length
=
base
->
mr_size
;
sge
.
lkey
=
base
->
mr
->
lkey
;
memset
(
&
rr
,
0
,
sizeof
(
rr
));
rr
.
next
=
NULL
;
rr
.
wr_id
=
0
;
rr
.
sg_list
=
&
sge
;
rr
.
num_sge
=
1
;
if
(
ibv_post_recv
(
base
->
qp
,
&
rr
,
&
bad_wr
))
D
(
err_msg
(
"ibv_post_recv"
,
true
,
base
));
return
;
}
int
poll_completion
(
struct
resource_base
*
base
)
{
struct
ibv_wc
wc
;
unsigned
long
start_time_msec
;
unsigned
long
cur_time_msec
;
struct
timeval
cur_time
;
int
poll_result
=
-
1
;
gettimeofday
(
&
cur_time
,
NULL
);
start_time_msec
=
(
cur_time
.
tv_usec
*
1000
)
+
(
cur_time
.
tv_usec
/
1000
);
do
{
poll_result
=
ibv_poll_cq
(
base
->
cq
,
1
,
&
wc
);
gettimeofday
(
&
cur_time
,
NULL
);
cur_time_msec
=
(
cur_time
.
tv_sec
*
1000
)
+
(
cur_time
.
tv_usec
/
1000
);
}
while
(
poll_result
==
0
&&
((
cur_time_msec
-
start_time_msec
)
<
MAX_POLL_CQ_TIMEOUT
));
if
(
poll_result
<
0
)
D
(
err_msg
(
"ibv_poll_cq"
,
false
,
base
));
else
if
(
poll_result
==
0
){
//EMPTY CQ
//PLACEHOLDER
poll_result
=
0
;
return
1
;
}
else
{
if
(
wc
.
status
!=
IBV_WC_SUCCESS
)
{
//BAD COMPLETION
return
1
;
}
}
return
0
;
}
int
connect_qp
(
struct
resource_base
*
base
)
{
sock_connect
(
base
);
//std::cout<<sock_connect(base)<<std::endl;
/* ibv_query_gd seems to not be working when called in wrapper a function,
/* Current implmentation uses this ibv_query_gid call directly in main
*/
//union ibv_gid my_gid;
// if(ibv_query_gid(base->ctx, base->ib_port, base->gid_idx, &my_gid))
// D(err_msg("ibv_query_gid", false, base));
//setup exchange data
base
->
local_conn
->
addr
=
htonll
((
uintptr_t
)
base
->
mr_buf_addr
);
base
->
local_conn
->
rkey
=
htonl
(
base
->
mr
->
rkey
);
base
->
local_conn
->
qp_num
=
htonl
(
base
->
qp
->
qp_num
);
base
->
local_conn
->
lid
=
htons
(
base
->
port_attr
->
lid
);
//memcpy(&base->local_conn->gid, &my_gid, 16);
//initiate transfer
sync_remote_qp
(
base
,
(
char
*
)
base
->
local_conn
,
(
char
*
)
base
->
remote_conn
,
sizeof
(
conn_data
));
// std::cout<<"local qp: "<<base->local_conn->qp_num<<std::endl;
// std::cout<<"remote qp: "<<base->remote_conn->qp_num<<std::endl;
base
->
remote_conn
->
addr
=
ntohll
(
base
->
remote_conn
->
addr
);
base
->
remote_conn
->
rkey
=
ntohl
(
base
->
remote_conn
->
rkey
);
base
->
remote_conn
->
qp_num
=
ntohl
(
base
->
remote_conn
->
qp_num
);
base
->
remote_conn
->
lid
=
ntohs
(
base
->
remote_conn
->
lid
);
//change states
modify_reset_to_init
(
base
);
modify_init_to_rtr
(
base
);
if
(
base
->
server_name
!=
NULL
)
{
post_receive
(
base
);
}
modify_rtr_to_rts
(
base
);
if
(
base
->
server_name
==
NULL
)
{
post_send
(
base
,
IBV_WR_SEND
);
}
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/rdma_helper.hpp
0 → 100644
View file @
d2d47b86
#include <string>
#include <byteswap.h>
#include <infiniband/verbs.h>
#ifndef __RDMA_HELPER__
#define __RDMA_HELPER__
#if __BYTE_ORDER == LITTLE_ENDIAN
static
inline
uint64_t
htonll
(
uint64_t
x
)
{
return
bswap_64
(
x
);}
static
inline
uint64_t
ntohll
(
uint64_t
x
)
{
return
bswap_64
(
x
);}
#elif __BYTE_ORDER == BIG_ENDIAN
static
inline
uint64_t
htonll
(
uint64_t
x
)
{
return
x
;}
static
inline
uint64_t
ntohll
(
uint64_t
x
)
{
return
x
;}
#else
#error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN
#endif
#define D(x) do{x;}while(0)
const
int
MAX_POLL_CQ_TIMEOUT
=
2000
;
const
int
IB_PORT
=
1
;
const
int
GID_IDX
=
1
;
struct
conn_data
{
uint64_t
addr
;
//BUFFER ADDR
uint32_t
rkey
;
//REMOTE KEY
uint32_t
qp_num
;
//QP NUMBER
uint16_t
lid
;
//LOCAL ID
uint8_t
gid
[
16
];
//GLOBAL ID
};
struct
resource_base
{
struct
ibv_context
*
ctx
;
struct
ibv_device_attr
*
dev_attr
;
struct
ibv_port_attr
*
port_attr
;
struct
ibv_pd
*
pd
;
struct
ibv_cq
*
cq
;
struct
ibv_qp
*
qp
;
struct
ibv_mr
*
mr
;
struct
conn_data
*
remote_conn
;
struct
conn_data
*
local_conn
;
char
*
mr_buf_addr
;
uint32_t
mr_size
;
enum
ibv_mtu
mtu
;
short
int
min_rnr_timer
;
short
int
timeout
;
short
int
retry_cnt
;
short
int
ib_port
;
short
int
gid_idx
;
short
int
local_sock_fd
;
short
int
conn_fd
;
short
int
mode
;
char
*
server_name
;
int
port
;
};
void
err_msg
(
std
::
string
msg
,
bool
to_exit
,
struct
resource_base
*
base
);
void
cleanup
(
struct
resource_base
*
base
);
void
init_resources
(
struct
resource_base
*
base
);
bool
query_port
(
struct
resource_base
*
base
);
int
sock_connect
(
struct
resource_base
*
base
);
int
sync_remote_qp
(
struct
resource_base
*
base
,
char
*
local_data
,
char
*
remote_data
,
int
size
);
int
open_dev
(
struct
resource_base
*
base
,
short
int
dev_num
);
int
allocate_pd
(
struct
resource_base
*
base
);
int
register_mr
(
struct
resource_base
*
base
);
int
init_cq
(
struct
resource_base
*
base
);
int
init_qp
(
struct
resource_base
*
base
);
void
post_send
(
struct
resource_base
*
base
,
ibv_wr_opcode
opcode
);
void
post_receive
(
struct
resource_base
*
base
);
int
poll_completion
(
struct
resource_base
*
base
);
int
connect_qp
(
struct
resource_base
*
base
);
#endif
Smit_MTP_RamCloud_Replication_Offload/rdma_states.cpp
0 → 100644
View file @
d2d47b86
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
int
reset_state
(
struct
resource_base
*
base
)
{
int
flags
=
0
;
//REREGISTER MR : REREGISTER THE ACCESS TYPE
flags
=
IBV_ACCESS_REMOTE_WRITE
|
IBV_ACCESS_REMOTE_READ
|
IBV_ACCESS_LOCAL_WRITE
;
if
(
ibv_rereg_mr
(
base
->
mr
,
IBV_REREG_MR_CHANGE_ACCESS
,
base
->
pd
,
base
->
mr_buf_addr
,
base
->
mr_size
,
flags
))
D
(
err_msg
(
"reset_state"
,
true
,
base
));
//RESET QP : CHANGE STATE TO RESET, RESET CAP
flags
=
0
;
struct
ibv_qp_attr
reset_attr
;
memset
(
&
reset_attr
,
0
,
sizeof
(
reset_attr
));
reset_attr
.
qp_state
=
IBV_QPS_RESET
;
reset_attr
.
cap
=
{
.
max_send_wr
=
1
,
.
max_recv_wr
=
1
,
.
max_send_sge
=
1
,
.
max_recv_sge
=
1
};
flags
=
IBV_QP_STATE
|
IBV_QP_CAP
;
if
(
ibv_modify_qp
(
base
->
qp
,
&
reset_attr
,
flags
))
D
(
err_msg
(
"reset_state"
,
true
,
base
));
return
0
;
}
int
modify_reset_to_init
(
struct
resource_base
*
base
)
{
struct
ibv_qp_attr
init_attr
;
memset
(
&
init_attr
,
0
,
sizeof
(
init_attr
));
init_attr
.
qp_state
=
IBV_QPS_INIT
;
init_attr
.
pkey_index
=
0
;
init_attr
.
port_num
=
base
->
ib_port
;
init_attr
.
qp_access_flags
=
IBV_ACCESS_REMOTE_WRITE
|
IBV_ACCESS_REMOTE_READ
|
IBV_ACCESS_LOCAL_WRITE
;
int
flags
=
IBV_QP_STATE
|
IBV_QP_PKEY_INDEX
|
IBV_QP_PORT
|
IBV_QP_ACCESS_FLAGS
;
if
(
ibv_modify_qp
(
base
->
qp
,
&
init_attr
,
flags
))
D
(
err_msg
(
"modify_reset_to_init"
,
true
,
base
));
return
0
;
}
int
modify_init_to_rtr
(
struct
resource_base
*
base
)
{
struct
ibv_qp_attr
rtr_attr
;
memset
(
&
rtr_attr
,
0
,
sizeof
(
rtr_attr
));
rtr_attr
.
qp_state
=
IBV_QPS_RTR
;
rtr_attr
.
path_mtu
=
base
->
mtu
;
rtr_attr
.
dest_qp_num
=
base
->
remote_conn
->
qp_num
;
rtr_attr
.
rq_psn
=
0
;
rtr_attr
.
max_dest_rd_atomic
=
1
;
rtr_attr
.
min_rnr_timer
=
base
->
min_rnr_timer
;
rtr_attr
.
ah_attr
.
is_global
=
0
;
rtr_attr
.
ah_attr
.
dlid
=
base
->
remote_conn
->
lid
;
rtr_attr
.
ah_attr
.
sl
=
0
;
rtr_attr
.
ah_attr
.
src_path_bits
=
0
;
rtr_attr
.
ah_attr
.
port_num
=
base
->
ib_port
;
if
(
base
->
gid_idx
>=
0
)
{
rtr_attr
.
ah_attr
.
is_global
=
1
;
rtr_attr
.
ah_attr
.
port_num
=
base
->
ib_port
;
memcpy
(
&
rtr_attr
.
ah_attr
.
grh
.
dgid
,
base
->
remote_conn
->
gid
,
16
);
rtr_attr
.
ah_attr
.
grh
.
flow_label
=
0
;
rtr_attr
.
ah_attr
.
grh
.
hop_limit
=
1
;
rtr_attr
.
ah_attr
.
grh
.
sgid_index
=
base
->
gid_idx
;
rtr_attr
.
ah_attr
.
grh
.
traffic_class
=
0
;
}
int
flags
=
IBV_QP_STATE
|
IBV_QP_AV
|
IBV_QP_PATH_MTU
|
IBV_QP_DEST_QPN
|
IBV_QP_RQ_PSN
|
IBV_QP_MAX_DEST_RD_ATOMIC
|
IBV_QP_MIN_RNR_TIMER
;
if
(
ibv_modify_qp
(
base
->
qp
,
&
rtr_attr
,
flags
))
D
(
err_msg
(
"modify_init_to_rtr"
,
true
,
base
));
return
0
;
}
int
modify_rtr_to_rts
(
struct
resource_base
*
base
)
{
struct
ibv_qp_attr
rts_attr
;
memset
(
&
rts_attr
,
0
,
sizeof
(
rts_attr
));
rts_attr
.
qp_state
=
IBV_QPS_RTS
;
rts_attr
.
timeout
=
base
->
timeout
;
rts_attr
.
retry_cnt
=
base
->
retry_cnt
;
rts_attr
.
rnr_retry
=
0
;
rts_attr
.
sq_psn
=
0
;
rts_attr
.
max_rd_atomic
=
1
;
int
flags
=
IBV_QP_STATE
|
IBV_QP_TIMEOUT
|
IBV_QP_RETRY_CNT
|
IBV_QP_RNR_RETRY
|
IBV_QP_SQ_PSN
|
IBV_QP_MAX_QP_RD_ATOMIC
;
if
(
ibv_modify_qp
(
base
->
qp
,
&
rts_attr
,
flags
))
D
(
err_msg
(
"modify_rtr_to_rts"
,
true
,
base
));
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/rdma_states.hpp
0 → 100644
View file @
d2d47b86
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#ifndef __RDMA_STATES__
#define __RDMA_STATES__
int
reset_state
(
struct
resource_base
*
base
);
int
modify_reset_to_init
(
struct
resource_base
*
base
);
int
modify_init_to_rtr
(
struct
resource_base
*
base
);
int
modify_rtr_to_rts
(
struct
resource_base
*
base
);
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/rep_offload_server.cpp
0 → 100644
View file @
d2d47b86
Smit_MTP_RamCloud_Replication_Offload/server.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <unistd.h>
#include <sys/time.h>
#include <string.h>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
const
short
int
MODE
=
2
;
char
*
SERVER_HOST
=
"192.168.200.20"
;
char
*
SERVER_NIC
=
"192.168.200.21"
;
const
int
NIC_PORT
=
8080
;
const
short
int
dev_num
=
0
;
void
dummy_loghandler
()
{
for
(
int
i
=
0
;
i
<
100
;
i
++
);
return
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
char
*
temp
=
(
char
*
)
malloc
(
128
);
struct
resource_base
*
base
;
base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
base
);
// base->server_name = SERVER_NAME;
base
->
mode
=
MODE
;
base
->
ib_port
=
IB_PORT
;
base
->
gid_idx
=
GID_IDX
;
base
->
port
=
NIC_PORT
;
open_dev
(
base
,
dev_num
);
allocate_pd
(
base
);
register_mr
(
base
);
init_cq
(
base
);
init_qp
(
base
);
struct
ibv_port_attr
port_attr
;
if
(
ibv_query_port
(
base
->
ctx
,
base
->
ib_port
,
&
port_attr
))
D
(
err_msg
(
"ibv_query_port"
,
true
,
base
));
if
(
port_attr
.
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
base
));
base
->
port_attr
=
&
port_attr
;
union
ibv_gid
my_gid
;
if
(
ibv_query_gid
(
base
->
ctx
,
base
->
ib_port
,
base
->
gid_idx
,
&
my_gid
))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
base
));
memcpy
(
base
->
local_conn
->
gid
,
&
my_gid
,
16
);
connect_qp
(
base
);
strcpy
(
base
->
mr_buf_addr
,
"Hi from server"
);
sync_remote_qp
(
base
,
"R"
,
temp
,
1
);
post_send
(
base
,
IBV_WR_RDMA_WRITE
);
temp
[
0
]
=
'A'
;
int
r_cnt
=
0
;
long
long
int
prev
=
-
1
;
struct
timeval
temp_time
;
double
s_time
,
e_time
;
double
avg_time
=
0
;
//memset(&base->mr_buf_addr, 0, base->mr_size);
union
object
*
sent_obj
=
(
union
object
*
)
base
->
mr_buf_addr
;
sent_obj
->
obj
.
key
=
0
;
while
(
temp
[
0
]
!=
'X'
)
{
sync_remote_qp
(
base
,
"T"
,
temp
,
1
);
if
(
temp
[
0
]
==
'W'
)
{
gettimeofday
(
&
temp_time
,
NULL
);
s_time
=
(
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
;
post_send
(
base
,
IBV_WR_RDMA_READ
);
if
(
poll_completion
(
base
))
D
(
err_msg
(
"IBV_WR_RDMA_READ"
,
true
,
base
));
while
(
prev
==
sent_obj
->
obj
.
key
);
dummy_loghandler
();
prev
=
sent_obj
->
obj
.
key
;
gettimeofday
(
&
temp_time
,
NULL
);
e_time
=
(
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
;
avg_time
+=
(
e_time
-
s_time
);
sync_remote_qp
(
base
,
"S"
,
temp
,
1
);
r_cnt
++
;
}
}
avg_time
/=
(
double
)
r_cnt
;
std
::
cout
<<
"Size of Object: "
<<
sizeof
(
union
object
)
<<
std
::
endl
;
std
::
cout
<<
"Objects received: "
<<
r_cnt
<<
std
::
endl
;
std
::
cout
<<
"Avg. RDMA Read time: "
<<
avg_time
<<
" ms"
<<
std
::
endl
;
//new
// post_send(base, IBV_WR_RDMA_READ);
// if(poll_completion(base))
// D(err_msg("IBV_WR_RDMA_READ", false, base));
// std::cout<<"Nic has"<<std::endl;
// strcpy(base->mr_buf_addr, "HELLO SMITTY\0");
// post_send(base, IBV_WR_SEND);
// if(poll_completion(base))
// D(err_msg("IBV_WR_SEND", true, base));
cleanup
(
base
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/server_nic.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <sys/time.h>
#include <unordered_map>
#include <infiniband/verbs.h>
#include "rdma_states.hpp"
#include "rdma_helper.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
const
int
num_servers
=
2
;
const
short
int
MODE
=
2
;
char
*
SERVER_HOST
[
num_servers
]
=
{
"192.168.200.20"
,
"192.168.200.50"
};
char
*
CLIENT_HOST
=
"192.168.200.40"
;
char
*
SERVER_NIC
=
"192.168.200.21"
;
const
int
SERVER_PORT
=
8080
;
const
int
CLIENT_PORT
=
8090
;
const
short
int
dev_num
=
0
;
const
double
err_fraction
=
0.0
;
struct
object_metadata
obj_metas
[
cache_meta_size
];
std
::
unordered_map
<
long
long
int
,
struct
object_metadata
>
obj_table
;
void
populate_cache_meta
(
std
::
unordered_map
<
long
long
,
struct
object_metadata
>&
m
)
{
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
m
[
i
]
=
obj_metas
[
i
];
m
[
i
].
key
=
i
;
if
((
double
)
rand
()
/
RAND_MAX
<=
err_fraction
)
m
[
i
].
valid
=
false
;
else
m
[
i
].
valid
=
true
;
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
srand
(
time
(
NULL
));
char
*
temp
=
(
char
*
)
malloc
(
128
);
//nic connects to servers
struct
resource_base
*
server_base
[
num_servers
];
struct
ibv_port_attr
server_port_attr
[
num_servers
];
union
ibv_gid
server_gid
[
num_servers
];
for
(
int
i
=
0
;
i
<
num_servers
;
i
++
)
{
server_base
[
i
]
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
server_base
[
i
]);
server_base
[
i
]
->
mode
=
1
;
server_base
[
i
]
->
ib_port
=
1
;
server_base
[
i
]
->
gid_idx
=
1
;
server_base
[
i
]
->
server_name
=
SERVER_HOST
[
i
];
server_base
[
i
]
->
port
=
SERVER_PORT
;
open_dev
(
server_base
[
i
],
dev_num
);
allocate_pd
(
server_base
[
i
]);
register_mr
(
server_base
[
i
]);
init_cq
(
server_base
[
i
]);
init_qp
(
server_base
[
i
]);
if
(
ibv_query_port
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
&
server_port_attr
[
i
]))
D
(
err_msg
(
"ibv_query_port"
,
true
,
server_base
[
i
]));
if
(
server_port_attr
[
i
].
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
server_base
[
i
]));
server_base
[
i
]
->
port_attr
=
&
server_port_attr
[
i
];
if
(
ibv_query_gid
(
server_base
[
i
]
->
ctx
,
server_base
[
i
]
->
ib_port
,
server_base
[
i
]
->
gid_idx
,
&
server_gid
[
i
]))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
server_base
[
i
]));
memcpy
(
server_base
[
i
]
->
local_conn
->
gid
,
&
server_gid
[
i
],
16
);
connect_qp
(
server_base
[
i
]);
sync_remote_qp
(
server_base
[
i
],
"T"
,
temp
,
1
);
while
(
poll_completion
(
server_base
[
i
]));
//BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("IBV_WR_SEND", false, server_base[i]));
}
//nic connects to client
struct
resource_base
*
client_base
;
struct
ibv_port_attr
client_port_attr
;
union
ibv_gid
client_gid
;
client_base
=
(
struct
resource_base
*
)
malloc
(
sizeof
(
struct
resource_base
));
init_resources
(
client_base
);
client_base
->
mode
=
1
;
client_base
->
ib_port
=
1
;
client_base
->
gid_idx
=
1
;
client_base
->
server_name
=
CLIENT_HOST
;
client_base
->
port
=
CLIENT_PORT
;
open_dev
(
client_base
,
dev_num
);
allocate_pd
(
client_base
);
register_mr
(
client_base
);
init_cq
(
client_base
);
init_qp
(
client_base
);
if
(
ibv_query_port
(
client_base
->
ctx
,
client_base
->
ib_port
,
&
client_port_attr
))
D
(
err_msg
(
"ibv_query_port"
,
true
,
client_base
));
if
(
client_port_attr
.
state
!=
IBV_PORT_ACTIVE
)
D
(
err_msg
(
"IB PORT NOT ACTIVE"
,
true
,
client_base
));
client_base
->
port_attr
=
&
client_port_attr
;
if
(
ibv_query_gid
(
client_base
->
ctx
,
client_base
->
ib_port
,
client_base
->
gid_idx
,
&
client_gid
))
D
(
err_msg
(
"ibv_query_gid"
,
true
,
client_base
));
memcpy
(
client_base
->
local_conn
->
gid
,
&
client_gid
,
16
);
connect_qp
(
client_base
);
sync_remote_qp
(
client_base
,
"T"
,
temp
,
1
);
while
(
poll_completion
(
client_base
));
//BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("IBV_WR_SEND, client", false, client_base));
//client_base->mr_buf_addr = (char *) malloc(client_base->mr_size);
//sock_connect(client_base);
//char *tp = (char *) malloc(128);
//sync_remote_qp(client_base, "Hi from client\0", client_base->mr_buf_addr, 15);
//std::cout<<"CLIENT SENT"<<std::endl;
//std::cout<<client_base->mr_buf_addr<<std::endl;
//std::cout<<"NIC CONNECTED TO CLIENT"<<std::endl;
int
err_cnt
=
0
;
int
succ_cnt
=
0
;
populate_cache_meta
(
obj_table
);
//obj_table[0].valid = false;
union
object
sent_obj
;
enum
Status
send_succ
=
STATUS_OK
;
enum
Status
send_err
=
STATUS_WRONG_VERSION
;
struct
timeval
temp_time
;
double
s_time
,
e_time
;
double
avg_time_diff
=
0.0
;
int
ack_cnt
;
for
(
int
i
=
0
;
i
<
cache_meta_size
;
i
++
)
{
memset
(
&
sent_obj
,
0
,
sizeof
(
sent_obj
));
read_obj
(
client_base
,
temp
,
1
);
if
(
temp
[
0
]
==
'R'
)
post_send
(
client_base
,
IBV_WR_RDMA_READ
);
sync_remote_qp
(
client_base
,
"T"
,
temp
,
1
);
while
(
poll_completion
(
client_base
));
//BAD PRACTICE BUT NEED THIS TEMPORARILY
//D(err_msg("client obj poll completion", false, client_base));
memcpy
((
void
*
)
&
sent_obj
,
(
void
*
)
client_base
->
mr_buf_addr
,
sizeof
(
sent_obj
));
//std::cout<<"Object got with key: "<<sent_obj.obj.key<<std::endl;
if
(
obj_table
[
sent_obj
.
obj
.
key
].
valid
)
{
for
(
resource_base
*
server
:
server_base
)
memcpy
((
void
*
)
server
->
mr_buf_addr
,
(
void
*
)
&
sent_obj
,
sizeof
(
sent_obj
));
gettimeofday
(
&
temp_time
,
NULL
);
s_time
=
((
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
);
//sync_remote_qp(server_base, "W", temp, 1); //prepare
//post_send(server_base, IBV_WR_RDMA_WRITE);
// if(poll_completion(server_base))
// D(err_msg("IBV_WR_RDMA_WRITE", true, server_base));
ack_cnt
=
0
;
for
(
resource_base
*
server
:
server_base
)
{
sync_remote_qp
(
server
,
"W"
,
temp
,
1
);
}
for
(
resource_base
*
server
:
server_base
)
{
sync_remote_qp
(
server
,
"T"
,
temp
,
1
);
//ack
if
(
temp
[
0
]
==
'S'
)
ack_cnt
++
;
temp
[
0
]
=
'T'
;
}
if
(
ack_cnt
==
num_servers
)
{
gettimeofday
(
&
temp_time
,
NULL
);
e_time
=
((
double
)
temp_time
.
tv_sec
*
1000
+
(
double
)
temp_time
.
tv_usec
/
1000
);
avg_time_diff
+=
(
e_time
-
s_time
);
succ_cnt
++
;
send_obj
(
client_base
,
(
char
*
)
&
send_succ
,
sizeof
(
send_succ
));
}
//sync_remote_qp(server_base, "T", temp, 1); //ack
// if(temp[0]=='S') {
// gettimeofday(&temp_time, NULL);
// e_time = ((double)temp_time.tv_sec*1000 + (double)temp_time.tv_usec/1000);
// avg_time_diff += (e_time - s_time);
// succ_cnt++;
// send_obj(client_base, (char *)&send_succ, sizeof(send_succ));
// }
}
else
{
send_obj
(
client_base
,
(
char
*
)
&
send_err
,
sizeof
(
send_err
));
err_cnt
++
;
}
}
avg_time_diff
/=
(
double
)
succ_cnt
;
std
::
cout
<<
"Errored requests: "
<<
err_cnt
<<
std
::
endl
;
std
::
cout
<<
"Successful requests: "
<<
succ_cnt
<<
std
::
endl
;
std
::
cout
<<
"Nic to Host obj transfer time: "
<<
avg_time_diff
<<
" ms"
<<
std
::
endl
;
for
(
resource_base
*
server
:
server_base
)
sync_remote_qp
(
server
,
"X"
,
temp
,
1
);
//for(int i=0; i<1000; i++) sync_remote_qp(client_base, "T", tp, 1);
for
(
resource_base
*
server
:
server_base
)
cleanup
(
server
);
cleanup
(
client_base
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/testing/request_test_host.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "../include/cli_api.hpp"
#include "../include/common.hpp"
#include "../include/client_functions.hpp"
#include "../include/connection_pool.hpp"
#include "../include/dispatcher.hpp"
#include "../include/thread_pool.hpp"
#include "../config/read_config.hpp"
using
namespace
std
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-04-nic"
]
};
int
conn_port
=
8888
;
int
main
()
{
string
tcp_test2
(
"Hey this is host"
);
string
tcp_test_str
(
"Hello this is tcp conn"
);
string
test_string
(
"Hello this is host"
);
Params
parameters
(
"../config/server_config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
dispatcher
.
add_connection
(
conn_addrs
[
0
],
conn_port
,
parameters
);
printf
(
"Connected to nic
\n
"
);
//printf("Size of conn pool: %d\n", dispatcher.conn_pool->rdma_connection_pool.size());
RDMA_Transport
*
rdma_transport
=
dispatcher
.
conn_pool
->
rdma_connection_pool
[
0
];
printf
(
"%p
\n
"
,
rdma_transport
);
TCP_Transport
*
tcp_transport
=
rdma_transport
->
get_tcp_conn
();
//send tcp
printf
(
"Now sending tcp
\n
"
);
tcp_transport
->
set_mr
(
test_string
,
test_string
.
size
());
tcp_transport
->
send_data
();
//recv tcp
char
*
buf
=
NULL
;
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"Received: %s
\n
"
,
buf
);
//one sided read
if
(
rdma_transport
->
one_sided_read
())
{
perror
(
"One sided write error"
);
return
-
1
;
}
//poll cq
if
(
rdma_transport
->
poll_cq
())
{
perror
(
"CQ err"
);
return
-
1
;
}
printf
(
"Received via RDMA: %s
\n
"
,
rdma_transport
->
get_mr_addr
());
//recv tcp
free
(
buf
);
buf
=
NULL
;
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"Received: %s
\n
"
,
buf
);
//set mr
rdma_transport
->
copy_to_mr
(
test_string
.
c_str
());
//send tcp
tcp_transport
->
set_mr
(
tcp_test2
,
tcp_test2
.
size
());
tcp_transport
->
send_data
();
//recv tcp
free
(
buf
);
buf
=
NULL
;
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"Received: %s
\n
"
,
buf
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/testing/request_test_nic.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <string>
#include <vector>
#include "../include/cli_api.hpp"
#include "../include/common.hpp"
#include "../include/client_functions.hpp"
#include "../include/connection_pool.hpp"
#include "../include/dispatcher.hpp"
#include "../include/thread_pool.hpp"
#include "../config/read_config.hpp"
using
namespace
std
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-05"
]
};
int
conn_port
=
8888
;
int
main
()
{
string
tcp_test_str
(
"Hello this is tcp conn"
);
string
test_string
(
"Hello this is NIC"
);
Params
parameters
(
"../config/nic_config.conf"
);
parameters
.
read_params
();
parameters
.
print_vals
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
dispatcher
.
add_connection
(
string
(),
conn_port
,
parameters
);
printf
(
"Connected to host
\n
"
);
RDMA_Transport
*
rdma_transport
=
dispatcher
.
conn_pool
->
rdma_connection_pool
[
0
];
TCP_Transport
*
tcp_transport
=
rdma_transport
->
get_tcp_conn
();
char
*
buf
=
NULL
;
//recv data
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"Received: %s
\n
"
,
buf
);
//set mr
rdma_transport
->
copy_to_mr
(
test_string
.
c_str
());
//send tcp
tcp_transport
->
set_mr
(
tcp_test_str
,
tcp_test_str
.
size
());
tcp_transport
->
send_data
();
//send tcp
tcp_transport
->
set_mr
(
tcp_test_str
,
tcp_test_str
.
size
());
tcp_transport
->
send_data
();
//recv tcp
free
(
buf
);
buf
=
NULL
;
tcp_transport
->
recv_data
(
&
buf
);
//one sided read
if
(
rdma_transport
->
one_sided_read
())
{
perror
(
"One sided write error"
);
return
-
1
;
}
//poll cq
if
(
rdma_transport
->
poll_cq
())
{
perror
(
"CQ err"
);
return
-
1
;
}
printf
(
"Received via RDMA: %s
\n
"
,
rdma_transport
->
get_mr_addr
());
//send tcp
tcp_transport
->
set_mr
(
tcp_test_str
,
tcp_test_str
.
size
());
tcp_transport
->
send_data
();
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/threaded_client.cc
0 → 100644
View file @
d2d47b86
#include <chrono>
#include <iostream>
#include <thread>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
using
namespace
std
;
double
arrival_rate
=
0.0
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-04-nic"
]
//the nic
};
int
conn_port
=
8888
;
string
key
,
value
;
size_t
key_length
=
64
;
enum
Entity
self_id
=
CLIENT
;
void
*
request_thread
(
Dispatcher
dispatcher
,
size_t
value_size
)
{
// request_thread puts out a request to the master
// according to the arrival rate mentioned in the config file
chrono
::
duration
<
double
>
interval
(
1
/
(
arrival_rate
));
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
for
(
int
i
=
0
;
i
<
500
;
i
++
)
{
//put out a request
write_send_request
(
dispatcher
.
conn_pool
->
rdma_connection_pool
[
0
],
key
.
c_str
(),
key_length
,
value
.
c_str
(),
value_size
);
//sleep for interval
this_thread
::
sleep_for
(
interval
);
}
return
NULL
;
}
void
*
response_thread
(
Dispatcher
dispatcher
)
{
// response_thread checks for any responses from the master
for
(
int
i
=
0
;
i
<
max_req
;
i
++
)
{
write_get_response
(
dispatcher
.
conn_pool
->
rdma_connection_pool
[
0
]);
}
return
NULL
;
}
int
main
()
{
srand
(
time
(
NULL
));
char
*
ipstr
=
(
char
*
)
malloc
(
max_ip_cmd_len
);
vector
<
string
>
tokens
;
Params
parameters
(
"config/threaded_client.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
arrival_rate
=
parameters
.
arrival_rate
;
if
(
debug
)
{
parameters
.
print_vals
();
}
// make key
for
(
int
i
=
0
;
i
<
key_length
;
i
++
)
{
key
.
push_back
((
char
)(
rand
()
%
256
));
}
for
(
int
i
=
0
;
i
<
parameters
.
rdma_mr_size_bytes
-
150
;
i
++
)
{
value
.
push_back
((
char
)(
i
%
256
));
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
);
for
(
string
conn_addr
:
conn_addrs
)
{
//dispatcher.add_connection(conn_addr, conn_port);
dispatcher
.
add_connection
(
conn_addrs
[
0
],
conn_port
,
parameters
);
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
if
(
debug
)
{
printf
(
"Connected
\n
"
);
}
thread
req_thrd
(
request_thread
,
dispatcher
,
parameters
.
rdma_mr_size_bytes
-
150
);
thread
resp_thrd
(
response_thread
,
dispatcher
);
req_thrd
.
join
();
resp_thrd
.
join
();
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
response_buffer_wait_time
)
{
response_buffer_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
client_rtt_time
)
{
client_rtt_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
cout
<<
"Set Arrival Rate: "
<<
parameters
.
arrival_rate
<<
" requests/second"
<<
endl
;
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Response Buffer Wait Time Avg: "
<<
response_buffer_wait_time_sum
/
response_buffer_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Client RTT Avg: "
<<
client_rtt_time_sum
/
client_rtt_time
.
size
()
<<
" seconds"
<<
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/threaded_client1.cc
0 → 100644
View file @
d2d47b86
#include <chrono>
#include <iostream>
#include <thread>
#include <random>
#include <stdio.h>
#include <string>
#include <vector>
#include "include/cli_api.hpp"
#include "include/common.hpp"
#include "include/client_functions.hpp"
#include "include/connection_pool.hpp"
#include "include/dispatcher.hpp"
#include "include/thread_pool.hpp"
#include "config/read_config.hpp"
#include "include/thread_functions.hpp"
using
namespace
std
;
double
arrival_rate
=
0.0
;
vector
<
string
>
conn_addrs
=
{
machine_allocation_ips
[
"ub-04-nic"
]
//the nic
};
int
num_connections
=
4
;
int
conn_port
=
8888
;
string
key
,
value
;
size_t
key_length
=
64
;
enum
Entity
self_id
=
CLIENT
;
void
*
request_manager
(
Dispatcher
dispatcher
,
size_t
value_size
,
uint8_t
transport_type
)
{
chrono
::
duration
<
double
>
interval
(
1
/
(
arrival_rate
));
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
job_context
*
job
;
Common_Request
*
cr
;
char
*
packet
;
size_t
final_size
=
sizeof
(
Common_Request
)
+
key_length
+
1
+
value_size
+
1
;
for
(
int
i
=
0
;
i
<
500
;
i
++
)
{
packet
=
(
char
*
)
malloc
(
final_size
);
memset
(
packet
,
0
,
final_size
);
memcpy
(
packet
+
sizeof
(
Common_Request
),
key
.
c_str
(),
key_length
);
memcpy
(
packet
+
sizeof
(
Common_Request
)
+
key_length
+
1
,
value
.
c_str
(),
value_size
);
cr
=
(
Common_Request
*
)
packet
;
cr
->
opcode
=
WRITE
;
cr
->
service_type
=
MASTER_SERVICE
;
cr
->
type
=
TYPE_REQUEST
;
cr
->
req
.
w_request
.
common
.
opcode
=
WRITE
;
cr
->
req
.
w_request
.
length
=
key_length
+
1
+
value_size
+
1
;
job
=
new
job_context
();
job
->
opcode
=
WRITE
;
job
->
job_type
=
TYPE_REQUEST
;
job
->
request
=
cr
;
job
->
request_packet
=
packet
;
job
->
service_type
=
MASTER_SERVICE
;
job
->
transport_type
=
transport_type
;
job
->
job_post_time
=
chrono
::
steady_clock
::
now
();
request_queue
->
enqueue
(
job
);
job
=
NULL
;
cr
=
NULL
;
packet
=
NULL
;
this_thread
::
sleep_for
(
interval
);
}
return
NULL
;
}
int
main
()
{
srand
(
time
(
NULL
));
random_device
rd
;
mt19937
gen
(
rd
());
uniform_int_distribution
<>
distr
(
1
,
255
);
char
*
ipstr
=
(
char
*
)
malloc
(
max_ip_cmd_len
);
vector
<
string
>
tokens
;
thread
worker_threads
[
num_connections
];
Params
parameters
(
"config/threaded_client.conf"
);
parameters
.
read_params
();
debug
=
parameters
.
debug
;
analyze
=
parameters
.
analyze
;
max_packet_size_bytes
=
parameters
.
max_packet_size_bytes
;
max_cq_poll_timeout
=
parameters
.
rdma_cq_poll_timeout_ms
;
conn_port
=
parameters
.
conn_port
;
arrival_rate
=
parameters
.
arrival_rate
;
if
(
debug
)
{
parameters
.
print_vals
();
}
Dispatcher
dispatcher
(
parameters
.
transport_type
,
parameters
.
num_threads
,
client_worker_function
);
for
(
int
i
=
0
;
i
<
num_connections
;
i
++
)
{
dispatcher
.
add_connection
(
conn_addrs
[
0
],
conn_port
,
parameters
);
if
(
debug
)
{
printf
(
"Connection #%d made
\n
"
,
i
);
}
if
(
debug
)
{
dispatcher
.
conn_pool
->
rdma_connection_pool
.
back
()
->
check_rdma_onesided
();
}
}
if
(
debug
)
{
printf
(
"Connected
\n
"
);
}
for
(
int
i
=
0
;
i
<
key_length
;
i
++
)
{
key
.
push_back
((
char
)(
distr
(
gen
)));
}
for
(
int
i
=
0
;
i
<
parameters
.
rdma_mr_size_bytes
-
150
;
i
++
)
{
value
.
push_back
((
char
)(
distr
(
gen
)));
}
for
(
int
i
=
0
;
i
<
num_connections
;
i
++
)
{
worker_threads
[
i
]
=
thread
(
client_worker_function
,
dispatcher
.
conn_pool
->
rdma_connection_pool
[
i
]);
}
thread
sender_thread
(
request_manager
,
dispatcher
,
parameters
.
rdma_mr_size_bytes
-
150
,
parameters
.
transport_type
);
sender_thread
.
join
();
if
(
analyze
)
{
for
(
chrono
::
duration
<
double
>
d
:
request_queue_wait_time
)
{
request_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
response_buffer_wait_time
)
{
response_buffer_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
send_queue_wait_time
)
{
send_queue_wait_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
client_rtt_time
)
{
client_rtt_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_read_time
)
{
rdma_one_sided_read_time_sum
+=
d
.
count
();
}
for
(
chrono
::
duration
<
double
>
d
:
rdma_one_sided_write_time
)
{
rdma_one_sided_write_time_sum
+=
d
.
count
();
}
cout
<<
"Set Arrival Rate: "
<<
parameters
.
arrival_rate
<<
" requests/second"
<<
endl
;
cout
<<
"Request Queue Wait Time Avg: "
<<
request_queue_wait_time_sum
/
request_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Response Buffer Wait Time Avg: "
<<
response_buffer_wait_time_sum
/
response_buffer_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Send Queue Wait Time Avg: "
<<
send_queue_wait_time_sum
/
send_queue_wait_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided read time: "
<<
rdma_one_sided_read_time_sum
/
rdma_one_sided_read_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Avg RDMA one sided write time: "
<<
rdma_one_sided_write_time_sum
/
rdma_one_sided_write_time
.
size
()
<<
" seconds"
<<
endl
;
cout
<<
"Client RTT Avg: "
<<
client_rtt_time_sum
/
client_rtt_time
.
size
()
<<
" seconds"
<<
endl
;
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/ClientTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include <chrono>
#include <thread>
#include "transport_config.hpp"
#include "common.hpp"
using
namespace
std
;
int
main
()
{
string
buf
=
"hi from client"
;
int
port
=
8888
;
RDMA_Transport
transport
(
"192.168.200.20"
,
port
);
transport
.
set_mr_size
(
512
);
if
(
transport
.
rdma_setup
())
{
perror
(
"RDMA setup failed"
);
}
RDMA_config
*
config
=
transport
.
get_config
();
this_thread
::
sleep_for
(
chrono
::
milliseconds
(
5
));
cout
<<
config
->
mr
.
mr_buf_addr
<<
endl
;
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/RDMATest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include "transport_config.hpp"
using
namespace
std
;
int
main
()
{
RDMA_Transport
r
;
r
.
rdma_open_dev
();
r
.
rdma_allocate_pd
();
r
.
rdma_register_mr
();
r
.
rdma_init_cq
();
r
.
rdma_init_qp
();
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/ServerTest.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include "transport_config.hpp"
#include "common.hpp"
using
namespace
std
;
int
main
()
{
string
buf
=
"hi from server"
;
int
port
=
8888
;
RDMA_Transport
transport
(
""
,
port
);
transport
.
set_mr_size
(
512
);
if
(
transport
.
rdma_setup
())
{
perror
(
"RDMA Setup failed"
);
return
-
1
;
}
if
(
transport
.
copy_to_mr
(
buf
.
c_str
()))
{
perror
(
"Unable to copy to MR"
);
return
-
1
;
}
RDMA_config
*
config
=
transport
.
get_config
();
cout
<<
config
->
mr
.
mr_buf_addr
<<
endl
;
if
(
transport
.
one_sided_write
())
{
perror
(
"Unable to issue RDMA Write"
);
}
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/TCPTestClient.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include "transport_config.hpp"
using
namespace
std
;
int
main
()
{
string
str
=
"he
\0
"
;
char
buf
[
100
];
TCP_Transport
t
(
"127.0.0.1"
,
8888
);
cout
<<
t
.
setup
()
<<
endl
;
cout
<<
"Connected..."
<<
endl
;
t
.
send_data
(
"he
\0
"
,
2
);
t
.
recv_data
(
buf
,
2
);
cout
<<
buf
;
t
.
close_conn
();
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/TCPTestServer.cc
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <string>
#include "transport_config.hpp"
using
namespace
std
;
int
main
()
{
char
buf
[
100
];
TCP_Transport
t
(
8888
);
cout
<<
t
.
setup
()
<<
endl
;
cout
<<
"Connected..."
<<
endl
;
t
.
recv_data
(
buf
,
2
);
cout
<<
buf
;
t
.
send_data
(
"yo
\0
"
,
2
);
t
.
close_conn
();
return
0
;
}
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/transport_config.cc
0 → 100644
View file @
d2d47b86
#ifndef __TRANSPORT_CONFIG_CC__
#define __TRANSPORT_CONFIG_CC__
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <string>
#include <chrono>
#include <infiniband/verbs.h>
#include "../include/common.hpp"
#include "transport_config.hpp"
namespace
chrono
=
std
::
chrono
;
TCP_Transport
::
TCP_Transport
()
{
this
->
config
=
new
TCP_config
();
this
->
mr
=
NULL
;
this
->
mr_size
=
0
;
}
TCP_Transport
::
TCP_Transport
(
std
::
string
a
,
int
b
)
{
this
->
config
=
new
TCP_config
(
a
,
b
);
this
->
mr
=
NULL
;
this
->
mr_size
=
0
;
}
TCP_Transport
::
TCP_Transport
(
int
b
)
{
this
->
config
=
new
TCP_config
(
b
);
this
->
mr
=
NULL
;
this
->
mr_size
=
0
;
}
TCP_Transport
::
TCP_Transport
(
TCP_config
&
t
)
{
this
->
config
=
new
TCP_config
(
t
);
this
->
mr
=
NULL
;
this
->
mr_size
=
0
;
}
TCP_Transport
::~
TCP_Transport
()
{
delete
(
this
->
config
);
this
->
config
=
NULL
;
if
(
this
->
mr
)
{
free
(
this
->
mr
);
this
->
mr
=
NULL
;
}
}
std
::
string
TCP_Transport
::
get_ip
()
{
return
this
->
config
->
ip
;
}
int
TCP_Transport
::
get_port
()
{
return
this
->
config
->
port
;
}
void
TCP_Transport
::
set_ip
(
std
::
string
addr
)
{
this
->
config
->
ip
=
addr
;
return
;
}
void
TCP_Transport
::
set_ip
(
char
*
addr
)
{
this
->
config
->
ip
=
std
::
string
(
addr
);
return
;
}
void
TCP_Transport
::
set_port
(
int
port
)
{
this
->
config
->
port
=
port
;
return
;
}
void
TCP_Transport
::
set_mr
(
char
*
buf_addr
,
size_t
buf_size
)
{
// if(this->mr!=NULL) {
// free(this->mr);
// this->mr = NULL;
// this->mr_size = 0;
// }
this
->
mr
=
buf_addr
;
this
->
mr_size
=
buf_size
;
}
void
TCP_Transport
::
set_mr
(
std
::
string
str
,
size_t
buf_size
)
{
// if(this->mr!=NULL) {
// free(this->mr);
// this->mr = NULL;
// this->mr_size = 0;
// }
char
*
temp
=
(
char
*
)
malloc
(
buf_size
);
memcpy
(
temp
,
str
.
c_str
(),
buf_size
);
this
->
mr
=
temp
;
this
->
mr_size
=
buf_size
;
}
// sets up a local socket
int
TCP_Transport
::
make_socket
()
{
TCP_config
*
config
=
this
->
config
;
if
(
config
->
port
<
0
)
{
//error
return
-
1
;
}
//setup the socket
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
config
->
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
config
->
local_sock_fd
=
socket
(
host_addr
.
sin_family
,
SOCK_STREAM
,
0
);
if
(
config
->
local_sock_fd
<
0
)
{
//error
return
-
1
;
}
if
(
config
->
ip
.
empty
())
{
//server
if
(
bind
(
config
->
local_sock_fd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
//error
close
(
config
->
local_sock_fd
);
return
-
1
;
}
}
return
0
;
}
int
TCP_Transport
::
start_listen
()
{
TCP_config
*
tcp_config
=
this
->
config
;
if
(
this
->
config
->
local_sock_fd
<
0
)
{
perror
(
"start_listen: No socket created"
);
return
-
1
;
}
if
(
listen
(
config
->
local_sock_fd
,
64
)
<
0
)
{
perror
(
"start_listen: listen error"
);
return
-
1
;
}
return
0
;
}
// creates a socket and sets it
// to listen
int
TCP_Transport
::
make_socket_listen
()
{
TCP_config
*
config
=
this
->
config
;
if
(
this
->
make_socket
())
{
//error
perror
(
"make_socket_listen: Unable to create socket"
);
return
-
1
;
}
if
(
listen
(
config
->
local_sock_fd
,
64
)
<
0
)
{
//error
perror
(
"make_socket_listen: Listen error"
);
return
-
1
;
}
return
0
;
}
// accept new conns
int
TCP_Transport
::
accept_conn
()
{
listen
(
config
->
local_sock_fd
,
1
);
config
->
conn_fd
=
accept
(
config
->
local_sock_fd
,
NULL
,
0
);
}
// makes the local socket connect to
// given ip and port
int
TCP_Transport
::
make_conn
()
{
TCP_config
*
config
=
this
->
config
;
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
config
->
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
inet_aton
(
config
->
ip
.
c_str
(),
&
host_addr
.
sin_addr
);
if
(
connect
(
config
->
local_sock_fd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
//error
close
(
config
->
local_sock_fd
);
return
-
1
;
}
config
->
conn_fd
=
config
->
local_sock_fd
;
return
0
;
}
int
TCP_Transport
::
setup
()
{
TCP_config
*
config
=
this
->
config
;
//sanity check
if
(
config
->
port
<
0
)
{
//error
perror
(
"TCP_Transport::setup: Port not set"
);
return
-
1
;
}
//setup tcp
struct
sockaddr_in
host_addr
;
memset
(
&
host_addr
,
0
,
sizeof
(
host_addr
));
host_addr
.
sin_family
=
AF_INET
;
host_addr
.
sin_port
=
htons
(
config
->
port
);
host_addr
.
sin_addr
.
s_addr
=
htonl
(
INADDR_ANY
);
config
->
local_sock_fd
=
socket
(
host_addr
.
sin_family
,
SOCK_STREAM
,
0
);
if
(
config
->
local_sock_fd
<
0
)
{
//error
perror
(
"TCP_Transport::setup: unable to get socket"
);
return
-
1
;
}
if
(
config
->
ip
.
empty
())
{
//this is the server
if
(
bind
(
config
->
local_sock_fd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
)))
{
//error
perror
(
"TCP_Transport::setup:bind: unable to bind"
);
close
(
config
->
local_sock_fd
);
return
-
1
;
}
listen
(
config
->
local_sock_fd
,
1
);
config
->
conn_fd
=
accept
(
config
->
local_sock_fd
,
NULL
,
0
);
return
0
;
}
else
{
//this is the client
inet_aton
(
config
->
ip
.
c_str
(),
&
host_addr
.
sin_addr
);
if
(
connect
(
config
->
local_sock_fd
,
(
struct
sockaddr
*
)
&
host_addr
,
sizeof
(
host_addr
))
<
0
)
{
//error
perror
(
"TCP_Transport::setup:connect: unable to connect"
);
close
(
config
->
local_sock_fd
);
return
-
1
;
}
config
->
conn_fd
=
config
->
local_sock_fd
;
return
0
;
}
return
-
1
;
}
int
TCP_Transport
::
send_data
(
std
::
string
str
,
int
size
)
{
const
char
*
buf
=
str
.
c_str
();
size_t
t
;
size_t
sz
=
size
;
size_t
i
=
0
;
while
(
sz
>
0
)
{
t
=
send
(
this
->
config
->
conn_fd
,
(
void
*
)(
buf
+
i
),
sz
,
0
);
if
(
t
==-
1
)
{
//error
return
-
1
;
}
sz
-=
t
;
i
+=
t
;
}
return
sz
;
}
int
TCP_Transport
::
send_data
(
char
*
buf
,
int
size
)
{
size_t
t
;
size_t
sz
=
size
;
size_t
i
=
0
;
while
(
sz
>
0
)
{
t
=
send
(
this
->
config
->
conn_fd
,
(
void
*
)(
buf
+
i
),
sz
,
0
);
if
(
t
==-
1
)
{
//error
return
-
1
;
}
sz
-=
t
;
i
+=
t
;
}
return
sz
;
}
int
TCP_Transport
::
send_data
()
{
if
(
this
->
mr
==
NULL
||
this
->
mr_size
==
0
)
{
//error
return
-
1
;
}
int
t
=
this
->
send_data
(
this
->
mr
,
this
->
mr_size
);
//if(this->mr!=NULL) free(this->mr);
this
->
mr
=
NULL
;
this
->
mr_size
=
0
;
return
t
;
}
int
TCP_Transport
::
recv_data
(
char
*
buf
,
int
size
)
{
size_t
t
;
size_t
sz
=
size
;
size_t
i
=
0
;
while
(
sz
>
0
)
{
t
=
recv
(
this
->
config
->
conn_fd
,
(
void
*
)(
buf
+
i
),
sz
,
0
);
if
(
t
==
0
)
{
return
0
;
}
if
(
t
==-
1
)
{
//error
return
-
1
;
}
sz
-=
t
;
i
+=
t
;
}
return
sz
;
}
ssize_t
TCP_Transport
::
recv_data
(
char
**
null_buffer
)
{
//null_buffer should be NULL
if
(
*
null_buffer
!=
NULL
)
{
printf
(
"Buffer passed to recv_data must be NULL
\n
"
);
return
0
;
}
*
null_buffer
=
(
char
*
)
malloc
(
max_packet_size_bytes
);
memset
(
*
null_buffer
,
0
,
max_packet_size_bytes
);
ssize_t
sz
=
0
;
ssize_t
t
=
0
;
//size_t i = 0;
size_t
dx
=
1025
;
do
{
t
=
recv
(
this
->
config
->
conn_fd
,
(
void
*
)(
*
null_buffer
+
sz
),
dx
,
0
);
if
(
t
==-
1
)
{
//error
return
-
1
;
}
sz
+=
t
;
}
while
(
t
!=
0
&
!
(
t
<
dx
));
return
sz
;
}
int
TCP_Transport
::
send_and_recv
(
char
*
send_buf
,
int
send_size
,
char
*
recv_buf
,
int
recv_size
)
{
if
(
this
->
send_data
(
send_buf
,
send_size
))
{
//error
return
-
1
;
}
if
(
this
->
recv_data
(
recv_buf
,
recv_size
))
{
//error
return
-
1
;
}
return
0
;
}
void
TCP_Transport
::
set_local_fd
(
int
fd
)
{
this
->
config
->
local_sock_fd
=
fd
;
}
void
TCP_Transport
::
set_conn_fd
(
int
fd
)
{
this
->
config
->
conn_fd
=
fd
;
}
int
TCP_Transport
::
get_local_fd
()
{
return
this
->
config
->
local_sock_fd
;
}
int
TCP_Transport
::
get_conn_fd
()
{
return
this
->
config
->
conn_fd
;
}
int
TCP_Transport
::
close_conn
()
{
if
(
this
->
config
->
conn_fd
!=-
1
)
close
(
this
->
config
->
conn_fd
);
if
(
this
->
config
->
local_sock_fd
!=-
1
)
close
(
this
->
config
->
local_sock_fd
);
return
0
;
}
//debug functions
RDMA_config
*
RDMA_Transport
::
get_config
()
{
return
this
->
rdma_config
;
}
void
RDMA_Transport
::
fill_default_parameters
()
{
this
->
rdma_config
->
dev
.
ctx
=
NULL
;
this
->
rdma_config
->
pd
=
NULL
;
this
->
rdma_config
->
_cq
->
cq
=
NULL
;
this
->
rdma_config
->
_qp
->
qp
=
NULL
;
this
->
rdma_config
->
mr
.
mr
=
NULL
;
this
->
rdma_config
->
mr
.
mr_buf_addr
=
NULL
;
this
->
rdma_config
->
mr
.
mr_size
=
((
1
<<
9
)
+
20
);
this
->
rdma_config
->
mtu
=
IBV_MTU_512
;
this
->
rdma_config
->
min_rnr_timer
=
12
;
this
->
rdma_config
->
timeout
=
12
;
this
->
rdma_config
->
retry_cnt
=
4
;
this
->
rdma_config
->
ib_port
=
1
;
this
->
rdma_config
->
gid_idx
=
1
;
}
//required functions
RDMA_Transport
::
RDMA_Transport
()
{
this
->
rdma_config
=
new
RDMA_config
();
this
->
tcp_transport
=
new
TCP_Transport
();
this
->
fill_default_parameters
();
}
RDMA_Transport
::
RDMA_Transport
(
std
::
string
a
,
int
b
)
{
this
->
rdma_config
=
new
RDMA_config
();
this
->
tcp_transport
=
new
TCP_Transport
(
a
,
b
);
this
->
fill_default_parameters
();
}
RDMA_Transport
::
RDMA_Transport
(
TCP_Transport
*
tcp_conn
)
{
this
->
rdma_config
=
new
RDMA_config
();
this
->
tcp_transport
=
tcp_conn
;
this
->
fill_default_parameters
();
}
RDMA_Transport
::~
RDMA_Transport
()
{
delete
(
this
->
tcp_transport
);
this
->
tcp_transport
=
NULL
;
delete
(
this
->
rdma_config
);
this
->
rdma_config
=
NULL
;
}
void
RDMA_Transport
::
set_conn_ip
(
std
::
string
ip
)
{
this
->
tcp_transport
->
set_ip
(
ip
);
}
void
RDMA_Transport
::
set_tcp_port
(
int
port
)
{
this
->
tcp_transport
->
set_port
(
port
);
}
int
RDMA_Transport
::
rdma_open_dev
()
{
struct
ibv_device
**
dev_list
=
NULL
;
int
num_devs
;
//struct ibv_context *dev_ctx = NULL;
dev_list
=
ibv_get_device_list
(
&
num_devs
);
if
(
dev_list
==
NULL
)
{
//error
return
-
1
;
}
this
->
rdma_config
->
dev
.
ctx
=
ibv_open_device
(
dev_list
[
this
->
rdma_config
->
dev
.
dev_num
]);
if
(
this
->
rdma_config
->
dev
.
ctx
==
NULL
)
{
//error
return
-
1
;
}
ibv_free_device_list
(
dev_list
);
return
0
;
}
int
RDMA_Transport
::
rdma_allocate_pd
()
{
if
(
this
->
rdma_config
->
dev
.
ctx
==
NULL
)
{
//error
return
-
1
;
}
this
->
rdma_config
->
pd
=
ibv_alloc_pd
(
this
->
rdma_config
->
dev
.
ctx
);
if
(
this
->
rdma_config
->
pd
==
NULL
)
{
//error
return
-
1
;
}
return
0
;
}
void
RDMA_Transport
::
set_mr_size
(
size_t
size
)
{
this
->
rdma_config
->
mr
.
mr_size
=
size
;
}
int
RDMA_Transport
::
rdma_register_mr
()
{
if
(
this
->
rdma_config
->
pd
==
NULL
)
{
//error
return
-
1
;
}
this
->
rdma_config
->
mr
.
mr_buf_addr
=
(
char
*
)
malloc
(
this
->
rdma_config
->
mr
.
mr_size
);
memset
(
this
->
rdma_config
->
mr
.
mr_buf_addr
,
0
,
this
->
rdma_config
->
mr
.
mr_size
);
this
->
rdma_config
->
mr
.
mr
=
ibv_reg_mr
(
this
->
rdma_config
->
pd
,
(
void
*
)
this
->
rdma_config
->
mr
.
mr_buf_addr
,
this
->
rdma_config
->
mr
.
mr_size
,
this
->
rdma_config
->
mr
.
mr_flags
);
if
(
this
->
rdma_config
->
mr
.
mr
==
NULL
)
{
//error
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
rdma_init_cq
()
{
if
(
this
->
rdma_config
->
dev
.
ctx
==
NULL
)
{
//error
return
-
1
;
}
if
(
ibv_query_device
(
this
->
rdma_config
->
dev
.
ctx
,
this
->
rdma_config
->
dev
.
dev_attr
))
{
//error
return
-
1
;
}
this
->
rdma_config
->
_cq
->
cq
=
ibv_create_cq
(
this
->
rdma_config
->
dev
.
ctx
,
this
->
rdma_config
->
_cq
->
depth
,
this
->
rdma_config
->
_cq
->
cq_context
,
this
->
rdma_config
->
_cq
->
channel
,
this
->
rdma_config
->
_cq
->
comp_vector
);
if
(
this
->
rdma_config
->
_cq
->
cq
==
NULL
)
{
//error
perror
(
"Unable to create CQ"
);
return
-
1
;
}
this
->
rdma_config
->
_qp
->
qp_init_attr
.
send_cq
=
this
->
rdma_config
->
_cq
->
cq
;
this
->
rdma_config
->
_qp
->
qp_init_attr
.
recv_cq
=
this
->
rdma_config
->
_cq
->
cq
;
return
0
;
}
int
RDMA_Transport
::
rdma_init_qp
()
{
if
(
this
->
rdma_config
->
pd
==
NULL
)
{
//error
perror
(
"PD not created"
);
return
-
1
;
}
if
(
this
->
rdma_config
->
_cq
->
cq
==
NULL
)
{
//error
perror
(
"CQ not created"
);
return
-
1
;
}
struct
qp_config
*
config
=
this
->
rdma_config
->
_qp
;
// struct ibv_qp_init_attr qp_init_attr;
// memset(&qp_init_attr, 0, sizeof(qp_init_attr));
config
->
qp_init_attr
.
sq_sig_all
=
1
;
config
->
qp_init_attr
.
send_cq
=
this
->
rdma_config
->
_cq
->
cq
;
config
->
qp_init_attr
.
recv_cq
=
this
->
rdma_config
->
_cq
->
cq
;
config
->
qp_init_attr
.
qp_type
=
IBV_QPT_RC
;
//default for now
config
->
qp_init_attr
.
cap
=
{
.
max_send_wr
=
3
,
.
max_recv_wr
=
3
,
.
max_send_sge
=
1
,
.
max_recv_sge
=
1
};
this
->
rdma_config
->
_qp
->
qp
=
ibv_create_qp
(
this
->
rdma_config
->
pd
,
&
config
->
qp_init_attr
);
if
(
this
->
rdma_config
->
_qp
->
qp
==
NULL
)
{
//error
perror
(
"Unable to create qp"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
rdma_query_port
()
{
if
(
this
->
rdma_config
->
dev
.
ctx
==
NULL
)
{
//error
perror
(
"rdma_query_port: Device context not set"
);
return
-
1
;
}
if
(
this
->
rdma_config
->
ib_port
<
0
)
{
//error
perror
(
"rdma_query_port: IB PORT not set"
);
return
-
1
;
}
if
(
this
->
rdma_config
->
gid_idx
<
0
)
{
//error
perror
(
"rdma_query_port: GID INDEX not set"
);
return
-
1
;
}
if
(
this
->
rdma_config
->
dev
.
port_attr
==
NULL
)
{
this
->
rdma_config
->
dev
.
port_attr
=
new
ibv_port_attr
();
}
if
(
ibv_query_port
(
this
->
rdma_config
->
dev
.
ctx
,
this
->
rdma_config
->
ib_port
,
this
->
rdma_config
->
dev
.
port_attr
))
{
//error
perror
(
"rdma_query_port: Unable to query port"
);
return
-
1
;
}
return
0
;
}
bool
RDMA_Transport
::
rdma_port_isactive
()
{
this
->
rdma_query_port
();
if
(
this
->
rdma_config
->
dev
.
port_attr
->
state
==
IBV_PORT_ACTIVE
)
{
return
true
;
}
return
false
;
}
int
RDMA_Transport
::
qp_state_to_reset
()
{
int
flags
=
this
->
rdma_config
->
mr
.
mr_flags
;
if
(
ibv_rereg_mr
(
this
->
rdma_config
->
mr
.
mr
,
flags
,
this
->
rdma_config
->
pd
,
this
->
rdma_config
->
mr
.
mr_buf_addr
,
this
->
rdma_config
->
mr
.
mr_size
,
flags
))
{
//error
return
-
1
;
}
flags
=
0
;
struct
ibv_qp_attr
reset_attr
;
memset
(
&
reset_attr
,
0
,
sizeof
(
reset_attr
));
reset_attr
.
qp_state
=
IBV_QPS_RESET
;
reset_attr
.
cap
=
{
.
max_send_wr
=
2
,
.
max_recv_wr
=
2
,
.
max_send_sge
=
1
,
.
max_recv_sge
=
1
};
flags
=
IBV_QP_STATE
|
IBV_QP_CAP
;
if
(
ibv_modify_qp
(
this
->
rdma_config
->
_qp
->
qp
,
&
reset_attr
,
flags
))
{
//error
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
qp_state_to_init
()
{
RDMA_config
*
config
=
this
->
rdma_config
;
struct
ibv_qp_attr
init_attr
;
memset
(
&
init_attr
,
0
,
sizeof
(
init_attr
));
init_attr
.
qp_state
=
IBV_QPS_INIT
;
init_attr
.
pkey_index
=
0
;
//need to read more
init_attr
.
port_num
=
config
->
ib_port
;
init_attr
.
qp_access_flags
=
config
->
_qp
->
qp_access_flags
;
int
flags
=
IBV_QP_STATE
|
IBV_QP_PKEY_INDEX
|
IBV_QP_PORT
|
IBV_QP_ACCESS_FLAGS
;
if
(
ibv_modify_qp
(
config
->
_qp
->
qp
,
&
init_attr
,
flags
))
{
//error
perror
(
"qp_state_to_init: Unable to modify qp to init"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
qp_state_to_rtr
()
{
RDMA_config
*
config
=
this
->
rdma_config
;
struct
ibv_qp_attr
rtr_attr
;
memset
(
&
rtr_attr
,
0
,
sizeof
(
rtr_attr
));
rtr_attr
.
qp_state
=
IBV_QPS_RTR
;
rtr_attr
.
path_mtu
=
config
->
mtu
;
rtr_attr
.
dest_qp_num
=
config
->
remote_conn
.
qp_num
;
rtr_attr
.
rq_psn
=
0
;
//need to read more
rtr_attr
.
max_dest_rd_atomic
=
1
;
//need to read more
rtr_attr
.
min_rnr_timer
=
config
->
min_rnr_timer
;
rtr_attr
.
ah_attr
.
is_global
=
0
;
//need to read more
rtr_attr
.
ah_attr
.
dlid
=
config
->
remote_conn
.
lid
;
rtr_attr
.
ah_attr
.
sl
=
0
;
//need to read more
rtr_attr
.
ah_attr
.
src_path_bits
=
0
;
//need to read more
rtr_attr
.
ah_attr
.
port_num
=
config
->
ib_port
;
if
(
config
->
gid_idx
>=
0
)
{
rtr_attr
.
ah_attr
.
is_global
=
1
;
//need to read more
memcpy
(
&
rtr_attr
.
ah_attr
.
grh
.
dgid
,
config
->
remote_conn
.
gid
,
16
);
rtr_attr
.
ah_attr
.
grh
.
flow_label
=
0
;
//need to read more
rtr_attr
.
ah_attr
.
grh
.
hop_limit
=
1
;
//need to read more
rtr_attr
.
ah_attr
.
grh
.
sgid_index
=
config
->
gid_idx
;
rtr_attr
.
ah_attr
.
grh
.
traffic_class
=
0
;
//need to read more
}
int
flags
=
IBV_QP_STATE
|
IBV_QP_AV
|
IBV_QP_PATH_MTU
|
IBV_QP_DEST_QPN
|
IBV_QP_RQ_PSN
|
IBV_QP_MAX_DEST_RD_ATOMIC
|
IBV_QP_MIN_RNR_TIMER
;
if
(
ibv_modify_qp
(
config
->
_qp
->
qp
,
&
rtr_attr
,
flags
))
{
//error
perror
(
"qp_state_to_rtr: Unable to modify qp to rtr"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
qp_state_to_rts
()
{
RDMA_config
*
config
=
this
->
rdma_config
;
struct
ibv_qp_attr
rts_attr
;
memset
(
&
rts_attr
,
0
,
sizeof
(
rts_attr
));
rts_attr
.
qp_state
=
IBV_QPS_RTS
;
rts_attr
.
timeout
=
config
->
timeout
;
rts_attr
.
retry_cnt
=
config
->
retry_cnt
;
rts_attr
.
rnr_retry
=
0
;
//need to read more
rts_attr
.
sq_psn
=
0
;
//need to read more
rts_attr
.
max_dest_rd_atomic
=
1
;
int
flags
=
IBV_QP_STATE
|
IBV_QP_TIMEOUT
|
IBV_QP_RETRY_CNT
|
IBV_QP_RNR_RETRY
|
IBV_QP_SQ_PSN
|
IBV_QP_MAX_QP_RD_ATOMIC
;
if
(
ibv_modify_qp
(
config
->
_qp
->
qp
,
&
rts_attr
,
flags
))
{
//error
perror
(
"qp_state_to_rts: Unable to modify qp to rts"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
qp_state_to_sqd
()
{
//need to figure out, till then
return
0
;
}
int
RDMA_Transport
::
qp_state_to_error
()
{
//need to figure out, till then
return
0
;
}
int
RDMA_Transport
::
qp_from_reset_to_rts
()
{
if
(
this
->
qp_state_to_init
())
{
// error
perror
(
"qp_from_reset_to_rts: Unable to move qp to init"
);
return
-
1
;
}
if
(
this
->
qp_state_to_rtr
())
{
// error
perror
(
"qp_from_reset_to_rts: Unable to move qp to rtr"
);
return
-
1
;
}
if
(
this
->
qp_state_to_rts
())
{
// error
perror
(
"qp_from_reset_to_rts: Unable to move qp to rts"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
rdma_query_qp
(
enum
ibv_qp_attr_mask
mask
)
{
qp_config
*
config
=
this
->
rdma_config
->
_qp
;
int
t
;
t
=
ibv_query_qp
(
config
->
qp
,
&
config
->
qp_attr
,
mask
,
&
config
->
qp_init_attr
);
if
(
t
)
{
//error
perror
(
"rdma_query_qp: Unable to query qp"
);
return
t
;
}
}
enum
ibv_qp_state
RDMA_Transport
::
get_curr_qp_state
()
{
this
->
rdma_query_qp
(
IBV_QP_STATE
);
return
(
this
->
rdma_config
->
_qp
->
qp_attr
.
cur_qp_state
);
}
int
RDMA_Transport
::
one_sided_read
()
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
//IBV_WR_RDMA_READ
struct
ibv_send_wr
sr
;
struct
ibv_sge
sge
;
struct
ibv_send_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
this
->
rdma_config
->
mr
.
mr_buf_addr
;
sge
.
length
=
this
->
rdma_config
->
mr
.
mr_size
;
sge
.
lkey
=
this
->
rdma_config
->
mr
.
mr
->
lkey
;
memset
(
&
sr
,
0
,
sizeof
(
sr
));
sr
.
next
=
NULL
;
sr
.
wr_id
=
0
;
//need to be dynamic
sr
.
sg_list
=
&
sge
;
sr
.
num_sge
=
1
;
//need to read more
sr
.
opcode
=
IBV_WR_RDMA_READ
;
sr
.
send_flags
=
IBV_SEND_SIGNALED
;
//placeholder to send wr completion events
// should be dynamic
sr
.
wr
.
rdma
.
remote_addr
=
this
->
rdma_config
->
remote_conn
.
addr
;
sr
.
wr
.
rdma
.
rkey
=
this
->
rdma_config
->
remote_conn
.
rkey
;
if
(
ibv_post_send
(
this
->
rdma_config
->
_qp
->
qp
,
&
sr
,
&
bad_wr
))
{
//error
return
-
1
;
}
int
poll_res
;
poll_res
=
this
->
poll_cq
();
if
(
poll_res
==
0
)
{
perror
(
"CQ err: empty CQ
\n
"
);
return
-
1
;
}
if
(
poll_res
==
-
1
)
{
perror
(
"CQ err: bad status
\n
"
);
return
-
1
;
}
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
rdma_one_sided_read_time
.
push_back
((
end_time
-
start_time
));
}
return
0
;
}
int
RDMA_Transport
::
one_sided_write
()
{
auto
start_time
=
chrono
::
steady_clock
::
now
();
auto
end_time
=
chrono
::
steady_clock
::
now
();
if
(
analyze
)
{
start_time
=
chrono
::
steady_clock
::
now
();
}
//IBV_WR_RDMA_WRITE
struct
ibv_send_wr
sr
;
struct
ibv_sge
sge
;
struct
ibv_send_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
this
->
rdma_config
->
mr
.
mr_buf_addr
;
sge
.
length
=
this
->
rdma_config
->
mr
.
mr_size
;
sge
.
lkey
=
this
->
rdma_config
->
mr
.
mr
->
lkey
;
memset
(
&
sr
,
0
,
sizeof
(
sr
));
sr
.
next
=
NULL
;
sr
.
wr_id
=
0
;
//need to be dynamic
sr
.
sg_list
=
&
sge
;
sr
.
num_sge
=
1
;
//need to read more
sr
.
opcode
=
IBV_WR_RDMA_WRITE
;
sr
.
send_flags
=
IBV_SEND_SIGNALED
;
//placeholder to send wr completion events
// should be dynamic
sr
.
wr
.
rdma
.
remote_addr
=
this
->
rdma_config
->
remote_conn
.
addr
;
sr
.
wr
.
rdma
.
rkey
=
this
->
rdma_config
->
remote_conn
.
rkey
;
if
(
ibv_post_send
(
this
->
rdma_config
->
_qp
->
qp
,
&
sr
,
&
bad_wr
))
{
//error
return
-
1
;
}
int
poll_res
;
poll_res
=
this
->
poll_cq
();
if
(
poll_res
==
0
)
{
perror
(
"CQ err: empty CQ
\n
"
);
return
-
1
;
}
if
(
poll_res
==
-
1
)
{
perror
(
"CQ err: bad status
\n
"
);
return
-
1
;
}
if
(
analyze
)
{
end_time
=
chrono
::
steady_clock
::
now
();
rdma_one_sided_write_time
.
push_back
((
end_time
-
start_time
));
}
return
0
;
}
int
RDMA_Transport
::
two_sided_send
()
{
//IBV_WR_SEND
struct
ibv_send_wr
sr
;
struct
ibv_sge
sge
;
struct
ibv_send_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
this
->
rdma_config
->
mr
.
mr_buf_addr
;
sge
.
length
=
this
->
rdma_config
->
mr
.
mr_size
;
sge
.
lkey
=
this
->
rdma_config
->
mr
.
mr
->
lkey
;
memset
(
&
sr
,
0
,
sizeof
(
sr
));
sr
.
next
=
NULL
;
sr
.
wr_id
=
0
;
//need to be dynamic
sr
.
sg_list
=
&
sge
;
sr
.
num_sge
=
1
;
//need to read more
sr
.
opcode
=
IBV_WR_SEND
;
sr
.
send_flags
=
IBV_SEND_SIGNALED
;
//placeholder to send wr completion events
// should be dynamic
if
(
ibv_post_send
(
this
->
rdma_config
->
_qp
->
qp
,
&
sr
,
&
bad_wr
))
{
//error
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
two_sided_recv
()
{
//RR
struct
ibv_recv_wr
rr
;
struct
ibv_sge
sge
;
struct
ibv_recv_wr
*
bad_wr
;
memset
(
&
sge
,
0
,
sizeof
(
sge
));
sge
.
addr
=
(
uintptr_t
)
this
->
rdma_config
->
mr
.
mr_buf_addr
;
sge
.
length
=
this
->
rdma_config
->
mr
.
mr_size
;
sge
.
lkey
=
this
->
rdma_config
->
mr
.
mr
->
lkey
;
memset
(
&
rr
,
0
,
sizeof
(
rr
));
rr
.
next
=
NULL
;
rr
.
wr_id
=
0
;
//need to ebe dynamic
rr
.
sg_list
=
&
sge
;
rr
.
num_sge
=
1
;
//need to read more
if
(
ibv_post_recv
(
this
->
rdma_config
->
_qp
->
qp
,
&
rr
,
&
bad_wr
))
{
//error
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
poll_cq
()
{
RDMA_config
*
config
=
this
->
rdma_config
;
struct
ibv_wc
wc
;
int
poll_result
=
-
1
;
auto
start
=
chrono
::
steady_clock
::
now
();
auto
end
=
chrono
::
steady_clock
::
now
();
chrono
::
duration
<
double
>
elapsed_time
;
do
{
poll_result
=
ibv_poll_cq
(
config
->
_cq
->
cq
,
1
,
&
wc
);
//need to think about the num_entries argument
end
=
chrono
::
steady_clock
::
now
();
elapsed_time
=
(
end
-
start
);
}
while
(
poll_result
==
0
&&
(
elapsed_time
)
<
max_cq_poll_timeout
);
if
(
poll_result
<
0
)
{
//error
return
-
1
;
}
else
if
(
poll_result
==
0
)
{
//empty cq
//some distinction needed between error and empty cq
return
0
;
}
else
{
if
(
wc
.
status
!=
IBV_WC_SUCCESS
)
{
printf
(
"Bad status while polling CQ: %d err
\n
"
,
wc
.
status
);
return
-
1
;
}
}
return
1
;
}
int
RDMA_Transport
::
check_rdma_conn
()
{
//warning: erases mr data
char
*
mr_buf
=
this
->
rdma_config
->
mr
.
mr_buf_addr
;
char
*
dummy_data
=
"hello"
;
std
::
string
ip
=
this
->
tcp_transport
->
get_ip
();
int
t
;
if
(
ip
.
empty
())
{
//this is the server
//so this machine posts send two sided verb
memset
(
mr_buf
,
0
,
this
->
rdma_config
->
mr
.
mr_size
);
memcpy
(
mr_buf
,
dummy_data
,
5
);
this
->
two_sided_send
();
//poll cq
t
=
this
->
poll_cq
();
if
(
t
==-
1
)
{
//error
return
-
1
;
}
if
(
t
==
0
)
{
//cq empty
return
0
;
}
return
1
;
}
else
{
//this is the client
memset
(
mr_buf
,
0
,
this
->
rdma_config
->
mr
.
mr_size
);
this
->
two_sided_recv
();
//poll
t
=
this
->
poll_cq
();
if
(
t
==-
1
)
{
//error
return
t
;
}
if
(
t
==
0
)
{
//cq empty
return
0
;
}
return
1
;
}
}
int
RDMA_Transport
::
check_rdma_onesided
()
{
//warning erases mr data
//Necessary that other party also has run the same function
TCP_Transport
*
tcp_transport
=
this
->
get_tcp_conn
();
char
*
mr_addr
=
this
->
rdma_config
->
mr
.
mr_buf_addr
;
char
*
dummy_data1
=
"hello this is server"
;
//20
char
*
dummy_data2
=
"hello this is client"
;
//20
char
*
buf
=
NULL
;
printf
(
"========================== RDMA ONESIDED TRANSPORTS TEST ==========================
\n
"
);
if
(
mr_addr
==
NULL
||
this
->
rdma_config
->
mr
.
mr_size
==
0
)
{
//error
perror
(
"check_rdma_onesided: MR not set"
);
return
-
1
;
}
if
(
tcp_transport
->
get_ip
().
empty
())
{
//this is the server
//server will not initiate comm
memset
(
mr_addr
,
0
,
this
->
get_mr_size
());
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"TCP Request for one-sided read received by server
\n
"
);
free
(
buf
);
buf
=
NULL
;
printf
(
"Initiating RDMA one sided read
\n
"
);
if
(
this
->
one_sided_read
())
{
//error
perror
(
"check_rdma_onesided: one sided read fail"
);
return
-
1
;
}
printf
(
"One sided read work request posted
\n
"
);
printf
(
"MR contents are: %s
\n
"
,
mr_addr
);
//now set server's mr and let client do one sided read
memset
(
mr_addr
,
0
,
this
->
get_mr_size
());
memcpy
(
mr_addr
,
dummy_data1
,
20
);
//tcp_transport->set_mr(dummy_data1, 20);
printf
(
"Sending tcp prep request to client
\n
"
);
tcp_transport
->
send_data
(
dummy_data1
,
20
);
printf
(
"Sent TCP prep request to client
\n
"
);
}
else
{
//this is the client
//client will initiate comm
memset
(
mr_addr
,
0
,
this
->
get_mr_size
());
memcpy
(
mr_addr
,
dummy_data2
,
20
);
//tcp_transport->set_mr(dummy_data2, 20);
printf
(
"Sending tcp prep request to server
\n
"
);
tcp_transport
->
send_data
(
dummy_data2
,
20
);
printf
(
"Sent TCP prep request to server
\n
"
);
//now get server's mr
tcp_transport
->
recv_data
(
&
buf
);
printf
(
"Got server's prep request
\n
"
);
free
(
buf
);
buf
=
NULL
;
printf
(
"Initiating RDMA one sided read
\n
"
);
memset
(
mr_addr
,
0
,
this
->
get_mr_size
());
if
(
this
->
one_sided_read
())
{
//error
perror
(
"check_rdma_onesided: one sided read fail"
);
return
-
1
;
}
printf
(
"One sided read request posted
\n
"
);
printf
(
"MR contents are: %s
\n
"
,
mr_addr
);
}
printf
(
"========================== END ==========================
\n
"
);
return
0
;
}
void
RDMA_Transport
::
set_mr_flags
(
int
flags
)
{
this
->
rdma_config
->
mr
.
mr_flags
=
flags
;
}
int
RDMA_Transport
::
copy_to_mr
(
const
char
*
src
)
{
size_t
mr_size
=
this
->
rdma_config
->
mr
.
mr_size
;
char
*
mr_addr
=
this
->
rdma_config
->
mr
.
mr_buf_addr
;
std
::
string
s
(
src
);
if
(
s
.
size
()
>
mr_size
)
{
//error
return
-
1
;
}
memcpy
(
mr_addr
,
src
,
s
.
size
());
return
0
;
}
int
RDMA_Transport
::
copy_to_mr
(
char
*
buf
,
size_t
buf_size
)
{
size_t
mr_size
=
this
->
rdma_config
->
mr
.
mr_size
;
char
*
mr_addr
=
this
->
rdma_config
->
mr
.
mr_buf_addr
;
if
(
buf_size
>
mr_size
)
{
//error
return
-
1
;
}
memcpy
(
mr_addr
,
buf
,
buf_size
);
return
0
;
}
char
*
RDMA_Transport
::
get_mr_addr
()
{
return
this
->
rdma_config
->
mr
.
mr_buf_addr
;
}
size_t
RDMA_Transport
::
get_mr_size
()
{
return
this
->
rdma_config
->
mr
.
mr_size
;
}
TCP_Transport
*
RDMA_Transport
::
get_tcp_conn
()
{
return
this
->
tcp_transport
;
}
int
RDMA_Transport
::
rdma_local_setup
()
{
//sets up stuff for rdma(except the connection part)
if
(
this
->
rdma_open_dev
())
{
//error
perror
(
"rdma_local_setup: Unable to open dev"
);
return
-
1
;
}
if
(
this
->
rdma_allocate_pd
())
{
//error
perror
(
"rdma_local_setup: Unable to allocate pd"
);
return
-
1
;
}
if
(
this
->
rdma_register_mr
())
{
//error
perror
(
"rdma_local_setup: Unable to register mr"
);
return
-
1
;
}
if
(
this
->
rdma_init_cq
())
{
//error
perror
(
"rdma_local_setup: Unable to init cq"
);
return
-
1
;
}
if
(
this
->
rdma_init_qp
())
{
//error
perror
(
"rdma_local_setup: Unable to init qp"
);
return
-
1
;
}
if
(
ibv_query_gid
(
this
->
rdma_config
->
dev
.
ctx
,
this
->
rdma_config
->
ib_port
,
this
->
rdma_config
->
gid_idx
,
this
->
rdma_config
->
gid_struct
))
{
perror
(
"rdma_local_setup: unable to query gid"
);
return
-
1
;
}
memcpy
(
this
->
rdma_config
->
local_conn
.
gid
,
this
->
rdma_config
->
gid_struct
,
16
);
return
0
;
}
int
RDMA_Transport
::
rdma_sync_config
()
{
RDMA_config
*
rdma_config
=
this
->
rdma_config
;
TCP_Transport
*
tcp_conn
=
this
->
tcp_transport
;
int
t
;
rdma_config
->
local_conn
.
addr
=
htonll
((
uintptr_t
)
rdma_config
->
mr
.
mr_buf_addr
);
rdma_config
->
local_conn
.
rkey
=
htonl
(
rdma_config
->
mr
.
mr
->
rkey
);
rdma_config
->
local_conn
.
qp_num
=
htonl
(
rdma_config
->
_qp
->
qp
->
qp_num
);
this
->
rdma_query_port
();
rdma_config
->
local_conn
.
lid
=
htons
(
rdma_config
->
dev
.
port_attr
->
lid
);
t
=
tcp_conn
->
send_and_recv
((
char
*
)
&
(
rdma_config
->
local_conn
),
sizeof
(
rdma_config
->
local_conn
),
(
char
*
)
&
(
rdma_config
->
remote_conn
),
sizeof
(
rdma_config
->
remote_conn
));
if
(
t
<
0
)
{
//error
perror
(
"rdma_sync_config: Unable to sync rdma info"
);
return
-
1
;
}
rdma_config
->
remote_conn
.
addr
=
ntohll
(
rdma_config
->
remote_conn
.
addr
);
rdma_config
->
remote_conn
.
rkey
=
ntohl
(
rdma_config
->
remote_conn
.
rkey
);
rdma_config
->
remote_conn
.
qp_num
=
ntohl
(
rdma_config
->
remote_conn
.
qp_num
);
rdma_config
->
remote_conn
.
lid
=
ntohs
(
rdma_config
->
remote_conn
.
lid
);
return
0
;
}
int
RDMA_Transport
::
get_local_fd
()
{
return
this
->
tcp_transport
->
get_local_fd
();
}
int
RDMA_Transport
::
get_conn_fd
()
{
return
this
->
tcp_transport
->
get_conn_fd
();
}
int
RDMA_Transport
::
rdma_setup
()
{
RDMA_config
*
config
=
this
->
rdma_config
;
TCP_Transport
*
tcp_conn
=
this
->
tcp_transport
;
int
t
;
if
(
this
->
rdma_local_setup
())
{
//error
perror
(
"rdma_setup: Local setup failed"
);
return
-
1
;
}
if
(
this
->
tcp_transport
->
get_port
()
==-
1
)
{
//error
perror
(
"rdma_setup: TCP port not set"
);
return
-
1
;
}
if
(
tcp_conn
->
setup
())
{
//error
perror
(
"rdma_setup: Unable to setup TCP"
);
return
-
1
;
}
t
=
this
->
rdma_sync_config
();
if
(
t
<
0
)
{
//error
perror
(
"rdma_setup: rdma_sync_config issue"
);
return
-
1
;
}
// config->local_conn.addr = htonll((uintptr_t)config->mr.mr_buf_addr);
// config->local_conn.rkey = htonl(config->mr.mr->rkey);
// config->local_conn.qp_num = htonl(config->_qp->qp->qp_num);
// this->rdma_query_port();
// config->local_conn.lid = htons(config->dev.port_attr->lid);
// t = tcp_conn->send_and_recv((char*) &(config->local_conn),
// sizeof(config->local_conn),
// (char*) &(config->remote_conn),
// sizeof(config->remote_conn));
// if(t<0) {
// //error
// return -1;
// }
// //now we have the remote machines conn obj
// config->remote_conn.addr = ntohll(config->remote_conn.addr);
// config->remote_conn.rkey = ntohl(config->remote_conn.rkey);
// config->remote_conn.qp_num = ntohl(config->remote_conn.qp_num);
// config->remote_conn.lid = ntohs(config->remote_conn.lid);
//state changes
// if(this->qp_state_to_init()) {
// //error
// perror("rdma_setup: Unable to move QP to init");
// return -1;
// }
// if(this->qp_state_to_rtr()) {
// //error
// perror("rdma_setup: Unable to move QP to rtr");
// return -1;
// }
// if(this->qp_state_to_rts()) {
// //error
// perror("rdma_setup: Unable to move QP to rts");
// return -1;
// }
if
(
this
->
qp_from_reset_to_rts
())
{
// error
perror
(
"rdma_setup: Error in moving qp states"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
rdma_accept_conn_setup
()
{
RDMA_config
*
rdma_config
=
this
->
rdma_config
;
TCP_Transport
*
tcp_conn
=
this
->
tcp_transport
;
int
t
;
if
(
this
->
rdma_local_setup
())
{
//error
perror
(
"rdma_setup: Local setup failed"
);
return
-
1
;
}
if
(
tcp_conn
->
get_port
()
<
0
)
{
// error
perror
(
"rdma_setup: TCP port not set"
);
return
-
1
;
}
if
(
tcp_conn
->
accept_conn
())
{
// error
perror
(
"rdma_setup: TCP accept conn failed"
);
return
-
1
;
}
t
=
this
->
rdma_sync_config
();
if
(
t
<
0
)
{
//error
perror
(
"rdma_accept_conn_setup: rdma_sync_config issue"
);
return
-
1
;
}
//state changes
if
(
this
->
qp_from_reset_to_rts
())
{
// error
perror
(
"rdma_accept_conn_setup: Error in moving qp states"
);
return
-
1
;
}
return
0
;
}
int
RDMA_Transport
::
rdma_setup_no_tcp_setup
()
{
if
(
this
->
tcp_transport
==
NULL
)
{
//error
perror
(
"rdma_setup_no_tcp"
);
}
RDMA_config
*
rdma_config
=
this
->
rdma_config
;
TCP_Transport
*
tcp_conn
=
this
->
tcp_transport
;
int
t
;
if
(
this
->
rdma_local_setup
())
{
//error
perror
(
"rdma_setup_no_tcp_setup: local setup fail"
);
return
-
1
;
}
t
=
this
->
rdma_sync_config
();
if
(
t
<
0
)
{
//error
perror
(
"rdma_setup_no_tcp_setup: sync config fail"
);
return
-
1
;
}
//state changes
if
(
this
->
qp_from_reset_to_rts
())
{
// error
perror
(
"rdma_setup_no_tcp_setup: state changes fail"
);
return
-
1
;
}
return
0
;
}
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_api/transport_config.hpp
0 → 100644
View file @
d2d47b86
#ifndef __TRANSPORT_CONFIG_H__
#define __TRANSPORT_CONFIG_H__
#include <unistd.h>
#include <string>
#include <infiniband/verbs.h>
enum
Transport_Type
{
TCP_IP_TRANSPORT
,
UDP_TRANSPORT
,
RDMA_RC_TRANSPORT
,
RDMA_UC_TRANSPORT
};
struct
TCP_config
{
std
::
string
ip
;
int
port
;
int
conn_fd
;
int
local_sock_fd
;
TCP_config
()
{
this
->
ip
.
clear
();
this
->
port
=
-
1
;
this
->
conn_fd
=
-
1
;
this
->
local_sock_fd
=
-
1
;
}
TCP_config
(
int
b
)
{
this
->
ip
.
clear
();
this
->
port
=
b
;
this
->
conn_fd
=
-
1
;
this
->
local_sock_fd
=
-
1
;
}
TCP_config
(
std
::
string
a
,
int
b
)
{
this
->
ip
=
a
;
this
->
port
=
b
;
this
->
conn_fd
=
-
1
;
this
->
local_sock_fd
=
-
1
;
}
TCP_config
(
TCP_config
&
t
)
{
this
->
ip
=
t
.
ip
;
this
->
port
=
t
.
port
;
this
->
conn_fd
=
t
.
conn_fd
;
this
->
local_sock_fd
=
t
.
local_sock_fd
;
}
~
TCP_config
()
{
this
->
ip
.
clear
();
this
->
port
=
-
1
;
if
(
this
->
conn_fd
>
0
)
{
close
(
this
->
conn_fd
);
this
->
conn_fd
=
-
1
;
}
if
(
this
->
local_sock_fd
>
0
)
{
close
(
this
->
local_sock_fd
);
this
->
local_sock_fd
=
-
1
;
}
}
};
struct
rdma_conn
{
uint64_t
addr
;
//BUFFER ADDR
uint32_t
rkey
;
//REMOTE KEY
uint32_t
qp_num
;
//QP NUMBER
uint16_t
lid
;
//LOCAL ID
uint8_t
gid
[
16
];
//GLOBAL ID
rdma_conn
()
{
this
->
addr
=
0
;
this
->
rkey
=
0
;
this
->
qp_num
=
0
;
this
->
lid
=
0
;
memset
((
void
*
)
&
this
->
gid
,
0
,
16
);
}
~
rdma_conn
()
{
this
->
addr
=
0
;
this
->
rkey
=
0
;
this
->
qp_num
=
0
;
this
->
lid
=
0
;
memset
((
void
*
)
&
this
->
gid
,
0
,
16
);
}
};
struct
rdma_dev
{
struct
ibv_context
*
ctx
;
struct
ibv_device_attr
*
dev_attr
;
struct
ibv_port_attr
*
port_attr
;
int
dev_num
;
rdma_dev
()
{
this
->
ctx
=
NULL
;
this
->
dev_attr
=
new
ibv_device_attr
();
this
->
port_attr
=
new
ibv_port_attr
();
this
->
dev_num
=
0
;
//default value
}
~
rdma_dev
()
{
if
(
this
->
ctx
)
{
//close the device
ibv_close_device
(
this
->
ctx
);
//free ctx
free
(
this
->
ctx
);
this
->
ctx
=
NULL
;
}
if
(
this
->
dev_attr
)
{
free
(
this
->
dev_attr
);
this
->
dev_attr
=
NULL
;
}
if
(
this
->
port_attr
)
{
free
(
this
->
port_attr
);
this
->
port_attr
=
NULL
;
}
}
};
struct
rdma_mr
{
char
*
mr_buf_addr
;
size_t
mr_size
;
struct
ibv_mr
*
mr
;
int
mr_flags
;
rdma_mr
()
{
this
->
mr_buf_addr
=
NULL
;
this
->
mr_size
=
0
;
this
->
mr
=
NULL
;
this
->
mr_flags
=
IBV_ACCESS_REMOTE_WRITE
|
IBV_ACCESS_REMOTE_READ
|
IBV_ACCESS_LOCAL_WRITE
;
}
~
rdma_mr
()
{
if
(
ibv_dereg_mr
(
this
->
mr
))
{
//error statement
perror
(
"Unable to deregister MR"
);
}
if
(
this
->
mr_buf_addr
)
{
free
(
this
->
mr_buf_addr
);
this
->
mr_buf_addr
=
NULL
;
}
if
(
this
->
mr
)
{
free
(
this
->
mr
);
this
->
mr
=
NULL
;
}
}
};
struct
cq_config
{
struct
ibv_cq
*
cq
;
int
depth
;
void
*
cq_context
;
ibv_comp_channel
*
channel
;
int
comp_vector
;
cq_config
()
{
this
->
cq
=
NULL
;
this
->
depth
=
3
;
//some default value
this
->
cq_context
=
NULL
;
this
->
channel
=
NULL
;
this
->
comp_vector
=
0
;
}
~
cq_config
()
{
if
(
this
->
cq
)
{
if
(
ibv_destroy_cq
(
this
->
cq
))
{
//error
perror
(
"Unable to destroy cq"
);
}
this
->
cq
=
NULL
;
//delete(this->cq);
}
if
(
this
->
cq_context
)
{
free
(
this
->
cq_context
);
this
->
cq_context
=
NULL
;
}
if
(
this
->
channel
)
{
if
(
ibv_destroy_comp_channel
(
this
->
channel
))
{
//error
perror
(
"Unable to destroy comp_channel"
);
}
this
->
channel
=
NULL
;
}
}
};
struct
qp_config
{
struct
ibv_qp
*
qp
;
void
*
qp_ctx
;
struct
ibv_qp_attr
qp_attr
;
struct
ibv_qp_init_attr
qp_init_attr
;
int
qp_access_flags
;
qp_config
()
{
this
->
qp
=
NULL
;
this
->
qp_ctx
=
NULL
;
this
->
qp_access_flags
=
IBV_ACCESS_REMOTE_WRITE
|
IBV_ACCESS_REMOTE_READ
|
IBV_ACCESS_LOCAL_WRITE
;
memset
(
&
this
->
qp_init_attr
,
0
,
sizeof
(
this
->
qp_init_attr
));
this
->
qp_init_attr
.
qp_context
=
this
->
qp_ctx
;
this
->
qp_init_attr
.
send_cq
=
NULL
;
this
->
qp_init_attr
.
recv_cq
=
NULL
;
this
->
qp_init_attr
.
qp_type
=
IBV_QPT_RC
;
this
->
qp_init_attr
.
sq_sig_all
=
1
;
this
->
qp_init_attr
.
cap
=
{
.
max_send_wr
=
2
,
.
max_recv_wr
=
2
,
.
max_send_sge
=
1
,
.
max_recv_sge
=
1
,
.
max_inline_data
=
2
};
this
->
qp_init_attr
.
srq
=
NULL
;
}
qp_config
(
enum
ibv_qp_type
qp_type
)
{
this
->
qp
=
NULL
;
this
->
qp_ctx
=
NULL
;
memset
(
&
this
->
qp_init_attr
,
0
,
sizeof
(
this
->
qp_init_attr
));
this
->
qp_init_attr
.
qp_context
=
this
->
qp_ctx
;
this
->
qp_init_attr
.
send_cq
=
NULL
;
this
->
qp_init_attr
.
recv_cq
=
NULL
;
this
->
qp_init_attr
.
sq_sig_all
=
1
;
this
->
qp_init_attr
.
cap
=
{
.
max_send_wr
=
2
,
.
max_recv_wr
=
2
,
.
max_send_sge
=
1
,
.
max_recv_sge
=
1
,
.
max_inline_data
=
2
};
this
->
qp_init_attr
.
qp_type
=
qp_type
;
this
->
qp_init_attr
.
srq
=
NULL
;
}
~
qp_config
()
{
//check if qp in error state
//if not, move qq to error state
ibv_destroy_qp
(
this
->
qp
);
this
->
qp
=
NULL
;
if
(
this
->
qp_ctx
)
{
free
(
qp_ctx
);
this
->
qp_ctx
=
NULL
;
}
}
};
struct
RDMA_config
{
struct
TCP_config
;
struct
rdma_conn
remote_conn
;
struct
rdma_conn
local_conn
;
struct
rdma_dev
dev
;
struct
rdma_mr
mr
;
struct
ibv_pd
*
pd
;
struct
qp_config
*
_qp
;
struct
cq_config
*
_cq
;
union
ibv_gid
*
gid_struct
;
enum
ibv_mtu
mtu
;
int
min_rnr_timer
;
int
timeout
;
int
retry_cnt
;
int
ib_port
;
int
gid_idx
;
enum
ibv_qp_state
curr_state
;
RDMA_config
()
{
//fill with some default values
this
->
pd
=
NULL
;
this
->
_qp
=
new
qp_config
();
this
->
_cq
=
new
cq_config
();
this
->
gid_struct
=
new
ibv_gid
();
this
->
mtu
=
IBV_MTU_512
;
this
->
min_rnr_timer
=
12
;
this
->
timeout
=
12
;
this
->
retry_cnt
=
4
;
this
->
ib_port
=
1
;
this
->
gid_idx
=
1
;
this
->
curr_state
=
IBV_QPS_UNKNOWN
;
}
~
RDMA_config
()
{
if
(
this
->
_qp
)
{
delete
(
this
->
_qp
);
}
if
(
this
->
_cq
)
{
delete
(
this
->
_cq
);
}
if
(
this
->
pd
)
{
if
(
ibv_dealloc_pd
(
this
->
pd
))
{
perror
(
"Unable to dealloc PD"
);
}
//free(this->pd);
this
->
pd
=
NULL
;
}
if
(
this
->
gid_struct
)
{
free
(
this
->
gid_struct
);
this
->
gid_struct
=
NULL
;
}
}
};
class
TCP_Transport
{
protected:
TCP_config
*
config
;
public:
char
*
mr
;
size_t
mr_size
;
TCP_Transport
();
TCP_Transport
(
std
::
string
a
,
int
b
);
TCP_Transport
(
int
b
);
TCP_Transport
(
TCP_config
&
t
);
~
TCP_Transport
();
std
::
string
get_ip
();
int
get_port
();
void
set_ip
(
std
::
string
addr
);
void
set_ip
(
char
*
addr
);
void
set_port
(
int
port
);
void
set_mr
(
char
*
buf_addr
,
size_t
buf_size
);
void
set_mr
(
std
::
string
str
,
size_t
buf_size
);
int
make_socket
();
int
start_listen
();
int
make_socket_listen
();
int
accept_conn
();
int
make_conn
();
int
setup
();
int
send_data
(
char
*
buf
,
int
size
);
int
send_data
(
std
::
string
str
,
int
size
);
int
send_data
();
int
recv_data
(
char
*
buf
,
int
size
);
ssize_t
recv_data
(
char
**
null_buffer
);
int
send_and_recv
(
char
*
send_buf
,
int
send_size
,
char
*
recv_buf
,
int
recv_size
);
void
set_local_fd
(
int
fd
);
void
set_conn_fd
(
int
fd
);
int
get_local_fd
();
int
get_conn_fd
();
int
close_conn
();
};
class
RDMA_Transport
{
protected:
TCP_Transport
*
tcp_transport
;
public:
RDMA_config
*
rdma_config
;
RDMA_Transport
();
RDMA_Transport
(
std
::
string
a
,
int
b
);
RDMA_Transport
(
TCP_Transport
*
tcp_conn
);
~
RDMA_Transport
();
//debug functions
RDMA_config
*
get_config
();
void
fill_default_parameters
();
//required functions
int
rdma_setup
();
//sets up everything including the connection
int
rdma_setup_no_tcp_setup
();
int
rdma_accept_conn_setup
();
int
rdma_local_setup
();
//sets up whatever can be achieved locally
int
rdma_sync_config
();
void
set_conn_ip
(
std
::
string
ip
);
void
set_tcp_port
(
int
port
);
int
rdma_open_dev
();
int
rdma_allocate_pd
();
void
set_mr_size
(
size_t
size
);
int
rdma_register_mr
();
int
rdma_init_cq
();
int
rdma_init_qp
();
int
rdma_query_port
();
bool
rdma_port_isactive
();
int
rdma_query_gid
();
void
set_mr_flags
(
int
flags
);
int
copy_to_mr
(
const
char
*
src
);
int
copy_to_mr
(
char
*
buf
,
size_t
buf_size
);
char
*
get_mr_addr
();
size_t
get_mr_size
();
TCP_Transport
*
get_tcp_conn
();
int
get_local_fd
();
int
get_conn_fd
();
int
qp_state_to_reset
();
int
qp_state_to_init
();
int
qp_state_to_rtr
();
int
qp_state_to_rts
();
int
qp_state_to_sqd
();
int
qp_state_to_error
();
int
qp_from_reset_to_rts
();
int
rdma_query_qp
(
enum
ibv_qp_attr_mask
mask
);
enum
ibv_qp_state
get_curr_qp_state
();
int
check_rdma_conn
();
int
check_rdma_onesided
();
int
one_sided_read
();
//RDMA_READ
int
one_sided_write
();
//RDMA_WRITE
int
two_sided_send
();
//WR_SEND
int
two_sided_recv
();
//RR
int
poll_cq
();
};
#endif
\ No newline at end of file
Smit_MTP_RamCloud_Replication_Offload/transport_helper.cpp
0 → 100644
View file @
d2d47b86
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <string>
#include <infiniband/verbs.h>
#include "rdma_helper.hpp"
#include "rdma_states.hpp"
#include "metadata.hpp"
#include "transport_helper.hpp"
int
send_obj
(
struct
resource_base
*
base
,
char
*
obj
,
int
size
)
{
int
op_bytes
=
0
;
op_bytes
=
write
(
base
->
conn_fd
,
obj
,
size
);
if
(
op_bytes
<
size
)
D
(
err_msg
(
"write: Unable to write"
,
false
,
base
);
return
1
);
return
0
;
}
int
read_obj
(
struct
resource_base
*
base
,
char
*
ret_obj
,
int
size
)
{
int
op_bytes
=
0
;
int
tot_bytes
=
0
;
while
(
tot_bytes
<
size
)
{
op_bytes
=
read
(
base
->
conn_fd
,
ret_obj
,
size
);
if
(
op_bytes
==
0
)
break
;
else
if
(
op_bytes
>
0
)
tot_bytes
+=
op_bytes
;
else
break
;
}
if
(
tot_bytes
<
size
)
D
(
err_msg
(
"read; Unable to read"
,
false
,
base
);
return
1
);
return
0
;
}
int
send_and_check
(
struct
resource_base
*
base
,
char
*
snd_obj
,
char
*
ret_obj
,
int
send_size
,
int
ret_size
)
{
if
(
send_obj
(
base
,
snd_obj
,
send_size
))
D
(
err_msg
(
"send_and_check;send_obj"
,
false
,
base
);
return
1
);
if
(
read_obj
(
base
,
ret_obj
,
ret_size
))
D
(
err_msg
(
"send_and_check;read_obj"
,
false
,
base
);
return
1
);
return
0
;
}
Smit_MTP_RamCloud_Replication_Offload/transport_helper.hpp
0 → 100644
View file @
d2d47b86
#include "rdma_helper.hpp"
#ifndef __TRANSPORT_HELPER__
#define __TRANSPORT_HELPER__
int
send_obj
(
struct
resource_base
*
base
,
char
*
send_obj
,
int
size
);
int
read_obj
(
struct
resource_base
*
base
,
char
*
ret_obj
,
int
size
);
int
send_and_check
(
struct
resource_base
*
base
,
char
*
snd_obj
,
char
*
ret_obj
,
int
send_size
,
int
ret_size
);
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment