parent
9eab536c14
commit
ca0fbd1819
@ -1,32 +0,0 @@ |
||||
import argparse |
||||
import os |
||||
import random |
||||
import sys |
||||
|
||||
from utils import utils |
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser(description='This script helps you to collect public ips') |
||||
parser.add_argument('--instance_output', type=str, dest='instance_output', |
||||
default='instance_output.txt', |
||||
help='the file contains node_name_tag and region number of created instances.') |
||||
parser.add_argument('--region_config', type=str, |
||||
dest='region_config', default='configuration.txt') |
||||
parser.add_argument('--file_output', type=str, |
||||
dest='file_output', default='raw_ip.txt') |
||||
args = parser.parse_args() |
||||
|
||||
if not args.instance_output or not os.path.isfile(args.instance_output): |
||||
print "%s or %s are not existed" % (args.file_output, args.instance_output) |
||||
sys.exit(1) |
||||
if args.instance_output: |
||||
with open(args.instance_output, "r") as fin, open(args.file_output, "w") as fout: |
||||
for line in fin.readlines(): |
||||
items = line.split(" ") |
||||
region_number = items[1].strip() |
||||
node_name_tag = items[0].strip() |
||||
ip_list = utils.collect_public_ips(region_number, node_name_tag, args.region_config) |
||||
random.shuffle(ip_list) |
||||
for ip in ip_list: |
||||
fout.write(ip + " " + node_name_tag + "\n") |
||||
print "Done collecting public ips %s" % args.file_output |
@ -1 +0,0 @@ |
||||
python collect_public_ips.py --instance_output instance_output.txt --file_output raw_ip.txt |
@ -1,80 +0,0 @@ |
||||
import argparse |
||||
import logging |
||||
import os |
||||
import stat |
||||
import sys |
||||
|
||||
from utils import utils |
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(threadName)s %(asctime)s - %(name)s - %(levelname)s - %(message)s') |
||||
LOGGER = logging.getLogger(__file__) |
||||
LOGGER.setLevel(logging.INFO) |
||||
|
||||
PEMS = [ |
||||
"virginia-key-benchmark.pem", |
||||
"ohio-key-benchmark.pem", |
||||
"california-key-benchmark.pem", |
||||
"oregon-key-benchmark.pem", |
||||
"tokyo-key-benchmark.pem", |
||||
"singapore-key-benchmark.pem", |
||||
"frankfurt-key-benchmark.pem", |
||||
"ireland-key-benchmark.pem", |
||||
] |
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser(description='This script helps you to genereate distribution config') |
||||
parser.add_argument('--distribution_config', type=str, |
||||
dest='distribution_config', default='distribution_config.txt') |
||||
parser.add_argument('--commander_logging', type=str, |
||||
dest='commander_logging', default='commander_logging.sh') |
||||
parser.add_argument('--logs_download', type=str, |
||||
dest='logs_download', default='logs_download.sh') |
||||
parser.add_argument('--commander_info', type=str, |
||||
dest='commander_info', default='commander_info.txt') |
||||
|
||||
args = parser.parse_args() |
||||
|
||||
if not os.path.exists(args.distribution_config): |
||||
sys.exit(1) |
||||
with open(args.distribution_config, "r") as fin: |
||||
lines = fin.readlines() |
||||
|
||||
commander_address = None |
||||
commander_region = None |
||||
commander_output = None |
||||
with open(args.distribution_config, "w") as fout: |
||||
for line in lines: |
||||
if "commander" in line: |
||||
items = [item.strip() for item in line.split(" ")] |
||||
commander_address = items[0] |
||||
commander_region = int(items[4][0]) |
||||
commander_output = "\n".join(items) |
||||
else: |
||||
fout.write(line.strip() + "\n") |
||||
if not commander_address or not commander_region: |
||||
LOGGER.info("Failed to extract commander address and commander region.") |
||||
sys.exit(1) |
||||
|
||||
with open(args.commander_info, "w") as fout: |
||||
fout.write(commander_output) |
||||
|
||||
LOGGER.info("Generated %s" % args.distribution_config) |
||||
LOGGER.info("Generated %s" % args.commander_info) |
||||
with open(args.commander_logging, "w") as fout: |
||||
fout.write("scp -i ../keys/%s %s ec2-user@%s:/tmp/distribution_config.txt\n" % (PEMS[commander_region - 1], args.distribution_config, commander_address)) |
||||
fout.write("scp -i ../keys/%s %s ec2-user@%s:/tmp/commander_info.txt\n" % (PEMS[commander_region - 1], args.commander_info, commander_address)) |
||||
fout.write("if [ $? -eq 0 ]; then\n\t") |
||||
fout.write("ssh -i ../keys/%s ec2-user@%s\n" % (PEMS[commander_region - 1], commander_address)) |
||||
fout.write("else\n\techo \"Failed to send %s to the commander machine\"\nfi\n" % args.distribution_config) |
||||
st = os.stat(args.commander_logging) |
||||
os.chmod(args.commander_logging, st.st_mode | stat.S_IEXEC) |
||||
LOGGER.info("Generated %s" % args.commander_logging) |
||||
|
||||
with open(args.logs_download, "w") as fout: |
||||
fout.write("scp -i ../keys/%s ec2-user@%s:~/projects/src/harmony-benchmark/bin/upload tmp/\n" % (PEMS[commander_region - 1], commander_address)) |
||||
st = os.stat(args.logs_download) |
||||
os.chmod(args.logs_download, st.st_mode | stat.S_IEXEC) |
||||
LOGGER.info("Generated %s" % args.logs_download) |
||||
|
||||
LOGGER.info("DONE.") |
||||
|
@ -1,426 +0,0 @@ |
||||
import boto3 |
||||
import argparse |
||||
import sys |
||||
import json |
||||
import time |
||||
import datetime |
||||
from threading import Thread |
||||
from Queue import Queue |
||||
import base64 |
||||
|
||||
|
||||
class InstanceResource: |
||||
ON_DEMAND = 1 |
||||
SPOT_INSTANCE = 2 |
||||
SPOT_FLEET = 3 |
||||
|
||||
|
||||
REGION_NAME = 'region_name' |
||||
REGION_KEY = 'region_key' |
||||
REGION_SECURITY_GROUP = 'region_security_group' |
||||
REGION_SECURITY_GROUP_ID = 'region_security_group_id' |
||||
REGION_HUMAN_NAME = 'region_human_name' |
||||
INSTANCE_TYPE = 't2.micro' |
||||
REGION_AMI = 'region_ami' |
||||
with open("user-data.sh", "r") as userdata_file: |
||||
USER_DATA = userdata_file.read() |
||||
|
||||
# UserData must be base64 encoded for spot instances. |
||||
USER_DATA_BASE64 = base64.b64encode(USER_DATA) |
||||
|
||||
IAM_INSTANCE_PROFILE = 'BenchMarkCodeDeployInstanceProfile' |
||||
REPO = "simple-rules/harmony-benchmark" |
||||
APPLICATION_NAME = 'benchmark-experiments' |
||||
time_stamp = time.time() |
||||
CURRENT_SESSION = datetime.datetime.fromtimestamp( |
||||
time_stamp).strftime('%H-%M-%S-%Y-%m-%d') |
||||
PLACEMENT_GROUP = "PLACEMENT-" + CURRENT_SESSION |
||||
NODE_NAME_SUFFIX = "NODE-" + CURRENT_SESSION |
||||
|
||||
|
||||
def create_launch_specification(region_number, instanceType): |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
return { |
||||
# Region irrelevant fields |
||||
'IamInstanceProfile': { |
||||
'Name': IAM_INSTANCE_PROFILE |
||||
}, |
||||
'InstanceType': instanceType, |
||||
'UserData': USER_DATA_BASE64, |
||||
# Region relevant fields |
||||
'SecurityGroups': [ |
||||
{ |
||||
# In certain scenarios, we have to use group id instead of group name |
||||
# https://github.com/boto/boto/issues/350#issuecomment-27359492 |
||||
'GroupId': config[region_number][REGION_SECURITY_GROUP_ID] |
||||
} |
||||
], |
||||
'ImageId': config[region_number][REGION_AMI], |
||||
'KeyName': config[region_number][REGION_KEY], |
||||
'UserData': USER_DATA_BASE64, |
||||
'TagSpecifications': [ |
||||
{ |
||||
'ResourceType': 'instance', |
||||
'Tags': [ |
||||
{ |
||||
'Key': 'Name', |
||||
'Value': NODE_NAME |
||||
} |
||||
] |
||||
} |
||||
], |
||||
# 'WeightedCapacity': 123.0, |
||||
# 'Placement': { |
||||
# # 'AvailabilityZone': get_one_availability_zone(ec2_client) |
||||
# } |
||||
} |
||||
|
||||
|
||||
def create_launch_specification_list(region_number, instance_type_list): |
||||
return list(map(lambda type: create_launch_specification(region_number, type), instance_type_list)) |
||||
|
||||
|
||||
""" |
||||
TODO: |
||||
|
||||
Build (argparse,functions) support for |
||||
1. run only create instance (multiple times) |
||||
2. run only codedeploy (multiple times) |
||||
3. run create instance followed by codedeploy |
||||
|
||||
""" |
||||
|
||||
### CREATE INSTANCES ### |
||||
|
||||
|
||||
def run_one_region_instances(config, region_number, number_of_instances, instance_resource=InstanceResource.ON_DEMAND): |
||||
#todo: explore the use ec2 resource and not client. e.g. create_instances -- Might make for better code. |
||||
""" |
||||
e.g. ec2.create_instances |
||||
""" |
||||
region_name = config[region_number][REGION_NAME] |
||||
session = boto3.Session(region_name=region_name) |
||||
ec2_client = session.client('ec2') |
||||
if instance_resource == InstanceResource.ON_DEMAND: |
||||
NODE_NAME = create_instances( |
||||
config, ec2_client, region_number, int(number_of_instances)) |
||||
# REPLACE ALL print with logger |
||||
print("Created %s in region %s" % (NODE_NAME, region_number)) |
||||
elif instance_resource == InstanceResource.SPOT_INSTANCE: |
||||
response = request_spot_instances( |
||||
config, ec2_client, region_number, int(number_of_instances)) |
||||
else: |
||||
instance_type_list = ['t2.micro', 't2.small', 'm3.medium'] |
||||
response = request_spot_fleet( |
||||
config, ec2_client, region_number, int(number_of_instances), instance_type_list) |
||||
return |
||||
return session |
||||
|
||||
|
||||
def create_instances(config, ec2_client, region_number, number_of_instances): |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
response = ec2_client.run_instances( |
||||
MinCount=number_of_instances, |
||||
MaxCount=number_of_instances, |
||||
ImageId=config[region_number][REGION_AMI], |
||||
Placement={ |
||||
'AvailabilityZone': get_one_availability_zone(ec2_client), |
||||
}, |
||||
SecurityGroups=[config[region_number][REGION_SECURITY_GROUP]], |
||||
IamInstanceProfile={ |
||||
'Name': IAM_INSTANCE_PROFILE |
||||
}, |
||||
KeyName=config[region_number][REGION_KEY], |
||||
UserData=USER_DATA, |
||||
InstanceType=INSTANCE_TYPE, |
||||
TagSpecifications=[ |
||||
{ |
||||
'ResourceType': 'instance', |
||||
'Tags': [ |
||||
{ |
||||
'Key': 'Name', |
||||
'Value': NODE_NAME |
||||
}, |
||||
] |
||||
}, |
||||
], |
||||
# We can also request spot instances this way but this way will block the |
||||
# process until spot requests are fulfilled, otherwise it will throw exception |
||||
# after 4 failed re-try. |
||||
# InstanceMarketOptions= { |
||||
# 'MarketType': 'spot', |
||||
# 'SpotOptions': { |
||||
# 'SpotInstanceType': 'one-time', |
||||
# 'BlockDurationMinutes': 60, |
||||
# } |
||||
# } |
||||
) |
||||
return NODE_NAME |
||||
|
||||
|
||||
def request_spot_instances(config, ec2_client, region_number, number_of_instances): |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
response = ec2_client.request_spot_instances( |
||||
# DryRun=True, |
||||
BlockDurationMinutes=60, |
||||
InstanceCount=number_of_instances, |
||||
LaunchSpecification={ |
||||
'SecurityGroups': [config[region_number][REGION_SECURITY_GROUP]], |
||||
'IamInstanceProfile': { |
||||
'Name': IAM_INSTANCE_PROFILE |
||||
}, |
||||
'UserData': USER_DATA_BASE64, |
||||
'ImageId': config[region_number][REGION_AMI], |
||||
'InstanceType': INSTANCE_TYPE, |
||||
'KeyName': config[region_number][REGION_KEY], |
||||
'Placement': { |
||||
'AvailabilityZone': get_one_availability_zone(ec2_client) |
||||
} |
||||
} |
||||
) |
||||
return response |
||||
|
||||
|
||||
def request_spot_fleet(config, ec2_client, region_number, number_of_instances, instance_type_list): |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
# https://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.Client.request_spot_fleet |
||||
response = ec2_client.request_spot_fleet( |
||||
# DryRun=True, |
||||
SpotFleetRequestConfig={ |
||||
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-fleet.html#spot-fleet-allocation-strategy |
||||
'AllocationStrategy': 'diversified', |
||||
'IamFleetRole': 'arn:aws:iam::656503231766:role/RichardFleetRole', |
||||
'LaunchSpecifications': create_launch_specification_list(region_number, instance_type_list), |
||||
# 'SpotPrice': 'string', # The maximum price per unit hour that you are willing to pay for a Spot Instance. The default is the On-Demand price. |
||||
'TargetCapacity': number_of_instances, |
||||
'OnDemandTargetCapacity': 0, |
||||
'Type': 'maintain' |
||||
} |
||||
) |
||||
return response |
||||
|
||||
|
||||
def get_availability_zones(ec2_client): |
||||
response = ec2_client.describe_availability_zones() |
||||
all_zones = [] |
||||
if response.get('AvailabilityZones', None): |
||||
region_info = response.get('AvailabilityZones') |
||||
for info in region_info: |
||||
if info['State'] == 'available': |
||||
all_zones.append(info['ZoneName']) |
||||
return all_zones |
||||
|
||||
|
||||
def get_one_availability_zone(ec2_client): |
||||
all_zones = get_availability_zones(ec2_client) |
||||
if len(all_zones) > 0: |
||||
return all_zones[0] |
||||
else: |
||||
print("No availability zone for this region") |
||||
sys.exit() |
||||
|
||||
#### CODEDEPLOY ### |
||||
|
||||
|
||||
def run_one_region_codedeploy(region_number, commitId): |
||||
#todo: explore the use ec2 resource and not client. e.g. create_instances -- Might make for better code. |
||||
""" |
||||
for getting instance ids:--- |
||||
ec2 = boto3.resource('ec2', region_name=region_name]) |
||||
result = ec2.instances.filter(Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]) |
||||
for instance in result: |
||||
instances.append(instance.id) |
||||
|
||||
for getting public ips : -- |
||||
ec2 = boto3.resource('ec2') |
||||
instance |
||||
""" |
||||
region_name = config[region_number][REGION_NAME] |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
session = boto3.Session(region_name=region_name) |
||||
ec2_client = session.client('ec2') |
||||
filters = [{'Name': 'tag:Name', 'Values': [NODE_NAME]}] |
||||
instance_ids = get_instance_ids( |
||||
ec2_client.describe_instances(Filters=filters)) |
||||
|
||||
print("Number of instances: %d" % len(instance_ids)) |
||||
|
||||
print("Waiting for all %d instances in region %s to start running" % |
||||
(len(instance_ids), region_number)) |
||||
waiter = ec2_client.get_waiter('instance_running') |
||||
waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
print("Waiting for all %d instances in region %s to be INSTANCE STATUS OK" % ( |
||||
len(instance_ids), region_number)) |
||||
waiter = ec2_client.get_waiter('instance_status_ok') |
||||
waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
print("Waiting for all %d instances in region %s to be SYSTEM STATUS OK" % |
||||
(len(instance_ids), region_number)) |
||||
waiter = ec2_client.get_waiter('system_status_ok') |
||||
waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
codedeploy = session.client('codedeploy') |
||||
application_name = APPLICATION_NAME |
||||
deployment_group = APPLICATION_NAME + "-" + \ |
||||
commitId[:6] + "-" + CURRENT_SESSION |
||||
repo = REPO |
||||
|
||||
print("Setting up to deploy commitId %s on region %s" % |
||||
(commitId, region_number)) |
||||
response = get_application(codedeploy, application_name) |
||||
deployment_group = get_deployment_group( |
||||
codedeploy, region_number, application_name, deployment_group) |
||||
depId = deploy(codedeploy, application_name, |
||||
deployment_group, repo, commitId) |
||||
return region_number, depId |
||||
|
||||
|
||||
def get_deployment_group(codedeploy, region_number, application_name, deployment_group): |
||||
NODE_NAME = region_number + "-" + NODE_NAME_SUFFIX |
||||
response = codedeploy.create_deployment_group( |
||||
applicationName=application_name, |
||||
deploymentGroupName=deployment_group, |
||||
deploymentConfigName='CodeDeployDefault.AllAtOnce', |
||||
serviceRoleArn='arn:aws:iam::656503231766:role/BenchMarkCodeDeployServiceRole', |
||||
deploymentStyle={ |
||||
'deploymentType': 'IN_PLACE', |
||||
'deploymentOption': 'WITHOUT_TRAFFIC_CONTROL' |
||||
}, |
||||
ec2TagFilters=[ |
||||
{ |
||||
'Key': 'Name', |
||||
'Value': NODE_NAME, |
||||
'Type': 'KEY_AND_VALUE' |
||||
} |
||||
] |
||||
) |
||||
return deployment_group |
||||
|
||||
|
||||
def get_application(codedeploy, application_name): |
||||
response = codedeploy.list_applications() |
||||
if application_name in response['applications']: |
||||
return response |
||||
else: |
||||
response = codedeploy.create_application( |
||||
applicationName=application_name, |
||||
computePlatform='Server' |
||||
) |
||||
return response |
||||
|
||||
|
||||
def deploy(codedeploy, application_name, deployment_group, repo, commitId): |
||||
"""Deploy new code at specified revision to instance. |
||||
|
||||
arguments: |
||||
- repo: GitHub repository path from which to get the code |
||||
- commitId: commit ID to be deployed |
||||
- wait: wait until the CodeDeploy finishes |
||||
""" |
||||
print("Launching CodeDeploy with commit " + commitId) |
||||
res = codedeploy.create_deployment( |
||||
applicationName=application_name, |
||||
deploymentGroupName=deployment_group, |
||||
deploymentConfigName='CodeDeployDefault.AllAtOnce', |
||||
description='benchmark experiments', |
||||
revision={ |
||||
'revisionType': 'GitHub', |
||||
'gitHubLocation': { |
||||
'repository': repo, |
||||
'commitId': commitId, |
||||
} |
||||
} |
||||
) |
||||
depId = res["deploymentId"] |
||||
print("Deployment ID: " + depId) |
||||
# The deployment is launched at this point, so exit unless asked to wait |
||||
# until it finishes |
||||
info = {'status': 'Created'} |
||||
start = time.time() |
||||
while info['status'] not in ('Succeeded', 'Failed', 'Stopped',) and (time.time() - start < 600.0): |
||||
info = codedeploy.get_deployment(deploymentId=depId)['deploymentInfo'] |
||||
print(info['status']) |
||||
time.sleep(15) |
||||
if info['status'] == 'Succeeded': |
||||
print("\nDeploy Succeeded") |
||||
return depId |
||||
else: |
||||
print("\nDeploy Failed") |
||||
print(info) |
||||
return depId |
||||
|
||||
|
||||
def run_one_region_codedeploy_wrapper(region_number, commitId, queue): |
||||
region_number, depId = run_one_region_codedeploy(region_number, commitId) |
||||
queue.put((region_number, depId)) |
||||
|
||||
|
||||
def launch_code_deploy(region_list, commitId): |
||||
queue = Queue() |
||||
jobs = [] |
||||
for i in range(len(region_list)): |
||||
region_number = region_list[i] |
||||
my_thread = Thread(target=run_one_region_codedeploy_wrapper, args=( |
||||
region_number, commitId, queue)) |
||||
my_thread.start() |
||||
jobs.append(my_thread) |
||||
for my_thread in jobs: |
||||
my_thread.join() |
||||
results = [queue.get() for job in jobs] |
||||
return results |
||||
|
||||
##### UTILS #### |
||||
|
||||
|
||||
def get_instance_ids(describe_instances_response): |
||||
instance_ids = [] |
||||
for reservation in describe_instances_response["Reservations"]: |
||||
for instance in reservation["Instances"]: |
||||
instance_ids.append(instance["InstanceId"]) |
||||
return instance_ids |
||||
|
||||
|
||||
def read_configuration_file(filename): |
||||
config = {} |
||||
with open(filename, 'r') as f: |
||||
for myline in f: |
||||
mylist = myline.strip().split(',') |
||||
region_num = mylist[0] |
||||
config[region_num] = {} |
||||
config[region_num][REGION_NAME] = mylist[1] |
||||
config[region_num][REGION_KEY] = mylist[2] |
||||
config[region_num][REGION_SECURITY_GROUP] = mylist[3] |
||||
config[region_num][REGION_HUMAN_NAME] = mylist[4] |
||||
config[region_num][REGION_AMI] = mylist[5] |
||||
config[region_num][REGION_SECURITY_GROUP_ID] = mylist[6] |
||||
return config |
||||
|
||||
##### UTILS #### |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser( |
||||
description='This script helps you start instances across multiple regions') |
||||
parser.add_argument('--regions', type=str, dest='regions', |
||||
default='3', help="Supply a csv list of all regions") |
||||
parser.add_argument('--instances', type=str, dest='numInstances', |
||||
default='1', help='number of instances') |
||||
parser.add_argument('--configuration', type=str, |
||||
dest='config', default='configuration.txt') |
||||
parser.add_argument('--commitId', type=str, dest='commitId', |
||||
default='1f7e6e7ca7cf1c1190cedec10e791c01a29971cf') |
||||
args = parser.parse_args() |
||||
config = read_configuration_file(args.config) |
||||
region_list = args.regions.split(',') |
||||
instances_list = args.numInstances.split(',') |
||||
assert len(region_list) == len(instances_list), "number of regions: %d != number of instances per region: %d" % ( |
||||
len(region_list), len(instances_list)) |
||||
commitId = args.commitId |
||||
for i in range(len(region_list)): |
||||
region_number = region_list[i] |
||||
number_of_instances = instances_list[i] |
||||
session = run_one_region_instances( |
||||
config, region_number, number_of_instances, InstanceResource.SPOT_FLEET) |
||||
results = launch_code_deploy(region_list, commitId) |
||||
print(results) |
@ -1,23 +0,0 @@ |
||||
if [ $# -lt 3 ]; then |
||||
echo "Please provide # of instances, # of shards, # of clients" |
||||
exit 1 |
||||
fi |
||||
|
||||
INSTANCE_NUM=$1 |
||||
SHARD_NUM=$2 |
||||
CLIENT_NUM=$3 |
||||
|
||||
echo "Creating $INSTANCE_NUM instances at 8 regions" |
||||
python create_instances.py --regions 1,2,3,4,5,6,7,8 --instances $INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM |
||||
|
||||
sleep 40 |
||||
echo "Rung collecint raw ips" |
||||
python collect_public_ips.py --instance_output instance_output.txt |
||||
|
||||
sleep 10 |
||||
echo "Generate distribution_config" |
||||
python generate_distribution_config.py --ip_list_file raw_ip.txt --shard_num $SHARD_NUM --client_num $CLIENT_NUM |
||||
|
||||
sleep 20 |
||||
echo "Deploy" |
||||
python deploy.py |
@ -1,15 +0,0 @@ |
||||
INSTANCE_NUM=1 |
||||
SHARD_NUM=1 |
||||
CLIENT_NUM=1 |
||||
|
||||
# echo "Creating $INSTANCE_NUM instances at 8 regions" |
||||
# python create_instances.py --regions 1,2,3,4,5,6,7,8 --instances $INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM |
||||
|
||||
echo "Rung collecint raw ips" |
||||
python collect_public_ips.py --instance_output instance_output.txt |
||||
|
||||
echo "Generate distribution_config" |
||||
python generate_distribution_config.py --ip_list_file raw_ip.txt --shard_num $SHARD_NUM --client_num $CLIENT_NUM |
||||
|
||||
echo "Deploy" |
||||
python deploy.py |
@ -1,155 +0,0 @@ |
||||
import argparse |
||||
import base64 |
||||
import boto3 |
||||
import datetime |
||||
import json |
||||
import sys |
||||
import threading |
||||
import time |
||||
import enum |
||||
|
||||
from utils import utils, spot_fleet, logger |
||||
|
||||
LOGGER = logger.getLogger(__file__) |
||||
|
||||
|
||||
class InstanceResource(enum.Enum): |
||||
ON_DEMAND = 'ondemand' |
||||
SPOT_INSTANCE = 'spot' |
||||
SPOT_FLEET = 'fleet' |
||||
|
||||
def __str__(self): |
||||
return self.value |
||||
|
||||
def run_one_region_on_demand_instances(config, region_number, number_of_instances, tag): |
||||
ec2_client = utils.create_client(config, region_number) |
||||
node_name_tag = create_instances( |
||||
config, ec2_client, region_number, number_of_instances, tag) |
||||
LOGGER.info("Created %s in region %s" % (node_name_tag, region_number)) |
||||
return node_name_tag, ec2_client |
||||
|
||||
def create_instances(config, ec2_client, region_number, number_of_instances, tag): |
||||
node_name_tag = utils.get_node_name_tag2(region_number, tag) |
||||
LOGGER.info("Creating node_name_tag: %s" % node_name_tag) |
||||
available_zone = utils.get_one_availability_zone(ec2_client) |
||||
LOGGER.info("Looking at zone %s to create instances." % available_zone) |
||||
|
||||
time.sleep(2) |
||||
ec2_client.run_instances( |
||||
MinCount=number_of_instances, |
||||
MaxCount=number_of_instances, |
||||
ImageId=config[region_number][utils.REGION_AMI], |
||||
Placement={ |
||||
'AvailabilityZone': available_zone, |
||||
}, |
||||
SecurityGroups=[config[region_number][utils.REGION_SECURITY_GROUP]], |
||||
IamInstanceProfile={ |
||||
'Name': utils.IAM_INSTANCE_PROFILE |
||||
}, |
||||
KeyName=config[region_number][utils.REGION_KEY], |
||||
UserData=utils.USER_DATA, |
||||
InstanceType=utils.INSTANCE_TYPE, |
||||
TagSpecifications=[ |
||||
{ |
||||
'ResourceType': 'instance', |
||||
'Tags': [ |
||||
{ |
||||
'Key': 'Name', |
||||
'Value': node_name_tag |
||||
}, |
||||
] |
||||
}, |
||||
], |
||||
) |
||||
time.sleep(30) |
||||
instance_ids = utils.get_instance_ids2(ec2_client, node_name_tag) |
||||
LOGGER.info("Waiting for all %d instances in region %s with node_name_tag %s to be in RUNNING" % ( |
||||
len(instance_ids), region_number, node_name_tag)) |
||||
time.sleep(20) |
||||
waiter = ec2_client.get_waiter('instance_running') |
||||
waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
count = 0 |
||||
while count < 40: |
||||
time.sleep(10) |
||||
LOGGER.info("Waiting ...") |
||||
ip_list = utils.collect_public_ips_from_ec2_client( |
||||
ec2_client, node_name_tag) |
||||
if len(ip_list) == number_of_instances: |
||||
LOGGER.info("Created %d instances" % number_of_instances) |
||||
return node_name_tag |
||||
count = count + 1 |
||||
LOGGER.info("Can not create %d instances" % number_of_instances) |
||||
return None |
||||
|
||||
|
||||
LOCK_FOR_RUN_ONE_REGION = threading.Lock() |
||||
|
||||
def run_for_one_region_on_demand(config, region_number, number_of_instances, fout, fout2): |
||||
tag = 0 |
||||
number_of_instances = int(number_of_instances) |
||||
while number_of_instances > 0: |
||||
number_of_creation = min(utils.MAX_INSTANCES_FOR_DEPLOYMENT, number_of_instances) |
||||
node_name_tag, ec2_client = run_one_region_on_demand_instances( |
||||
config, region_number, number_of_creation, tag) |
||||
if node_name_tag: |
||||
LOGGER.info("Managed to create instances for region %s with name_name_tag %s" % |
||||
(region_number, node_name_tag)) |
||||
instance_ids = utils.get_instance_ids2(ec2_client, node_name_tag) |
||||
LOCK_FOR_RUN_ONE_REGION.acquire() |
||||
try: |
||||
fout.write("%s %s\n" % (node_name_tag, region_number)) |
||||
for instance_id in instance_ids: |
||||
fout2.write(instance_id + " " + node_name_tag + " " + region_number + |
||||
" " + config[region_number][utils.REGION_NAME] + "\n") |
||||
finally: |
||||
LOCK_FOR_RUN_ONE_REGION.release() |
||||
else: |
||||
LOGGER.info("Failed to create instances for region %s" % region_number) |
||||
number_of_instances -= number_of_creation |
||||
tag += 1 |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser( |
||||
description='This script helps you start instances across multiple regions') |
||||
parser.add_argument('--regions', type=str, dest='regions', |
||||
default='3', help="Supply a csv list of all regions") |
||||
parser.add_argument('--instances', type=str, dest='num_instance_list', |
||||
default='1', help='number of instances in respective of region') |
||||
parser.add_argument('--region_config', type=str, |
||||
dest='region_config', default='configuration.txt') |
||||
parser.add_argument('--instance_output', type=str, dest='instance_output', |
||||
default='instance_output.txt', help='the file to append or write') |
||||
parser.add_argument('--instance_ids_output', type=str, dest='instance_ids_output', |
||||
default='instance_ids_output.txt', help='the file to append or write') |
||||
parser.add_argument('--instance_resource', dest='instance_resource', type=InstanceResource, |
||||
default=InstanceResource.ON_DEMAND, choices=list(InstanceResource)) |
||||
parser.add_argument('--append', dest='append', type=bool, default=False, |
||||
help='append to the current instance_output') |
||||
args = parser.parse_args() |
||||
config = utils.read_region_config(args.region_config) |
||||
region_list = args.regions.split(',') |
||||
num_instance_list = args.num_instance_list.split(',') |
||||
instance_resource = args.instance_resource |
||||
assert len(region_list) == len(num_instance_list), "number of regions: %d != number of instances per region: %d" % ( |
||||
len(region_list), len(num_instance_list)) |
||||
|
||||
write_mode = "a" if args.append else "w" |
||||
with open(args.instance_output, write_mode) as fout, open(args.instance_ids_output, write_mode) as fout2: |
||||
thread_pool = [] |
||||
for i in range(len(region_list)): |
||||
region_number = region_list[i] |
||||
number_of_instances = num_instance_list[i] |
||||
if instance_resource == InstanceResource.ON_DEMAND: |
||||
t = threading.Thread(target=run_for_one_region_on_demand, args=( |
||||
config, region_number, number_of_instances, fout, fout2)) |
||||
elif instance_resource == InstanceResource.SPOT_FLEET: |
||||
t = threading.Thread(target=spot_fleet.run_one_region, args=( |
||||
region_number, number_of_instances, fout, fout2)) |
||||
LOGGER.info("creating thread for region %s" % region_number) |
||||
t.start() |
||||
thread_pool.append(t) |
||||
for t in thread_pool: |
||||
t.join() |
||||
LOGGER.info("done.") |
@ -1 +0,0 @@ |
||||
python create_instances.py --regions 1,3 --instances 3,3 |
@ -1,7 +0,0 @@ |
||||
if [ $# -eq 0 ]; then |
||||
echo "Please provide # of instances" |
||||
exit 1 |
||||
fi |
||||
INSTANCE_NUM=$1 |
||||
echo "Creating $INSTANCE_NUM instances at 8 regions" |
||||
python create_instances.py --regions 1,2,3,4,5,6,7,8 --instances $INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM |
@ -1,6 +0,0 @@ |
||||
#This script is used for debugging and testing as we only created 2 instances. |
||||
#Be aware that the default output will be instance_output_2.txt |
||||
INSTANCE_NUM=2 |
||||
|
||||
echo "Creating $$INSTANCE_NUM instances at 8 regions" |
||||
python create_instances.py --regions 1,2,3,4,5,6,7,8 --instances $INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM --instance_output instance_output_2.txt |
@ -1,4 +0,0 @@ |
||||
INSTANCE_NUM=50 |
||||
|
||||
echo "Creating $INSTANCE_NUM instances at 8 regions" |
||||
python create_instances.py --regions 1,2,3,4,5,6,7,8 --instances $INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM,$INSTANCE_NUM --instance_output instance_output_50.txt |
@ -1,171 +0,0 @@ |
||||
import argparse |
||||
import base64 |
||||
import boto3 |
||||
import datetime |
||||
import json |
||||
import logging |
||||
import os |
||||
import subprocess |
||||
import sys |
||||
import threading |
||||
import time |
||||
|
||||
from utils import utils |
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(threadName)s %(asctime)s - %(name)s - %(levelname)s - %(message)s') |
||||
LOGGER = logging.getLogger(__file__) |
||||
LOGGER.setLevel(logging.INFO) |
||||
|
||||
class InstanceResource: |
||||
ON_DEMAND = 1 |
||||
SPOT_INSTANCE = 2 |
||||
SPOT_FLEET = 3 |
||||
|
||||
with open("user-data.sh", "r") as userdata_file: |
||||
USER_DATA = userdata_file.read() |
||||
|
||||
# UserData must be base64 encoded for spot instances. |
||||
USER_DATA_BASE64 = base64.b64encode(USER_DATA) |
||||
|
||||
IAM_INSTANCE_PROFILE = 'BenchMarkCodeDeployInstanceProfile' |
||||
REPO = "simple-rules/harmony-benchmark" |
||||
APPLICATION_NAME = 'benchmark-experiments' |
||||
time_stamp = time.time() |
||||
CURRENT_SESSION = datetime.datetime.fromtimestamp( |
||||
time_stamp).strftime('%H-%M-%S-%Y-%m-%d') |
||||
PLACEMENT_GROUP = "PLACEMENT-" + CURRENT_SESSION |
||||
NODE_NAME_SUFFIX = "NODE-" + CURRENT_SESSION |
||||
|
||||
|
||||
def run_one_region_codedeploy(region_number, region_config, node_name_tag_list, commit_id): |
||||
ec2_client, session = utils.create_ec2_client(region_number, region_config) |
||||
for node_name_tag in node_name_tag_list: |
||||
filters = [{'Name': 'tag:Name','Values': [node_name_tag]}] |
||||
instance_ids = utils.get_instance_ids(ec2_client.describe_instances(Filters=filters)) |
||||
|
||||
LOGGER.info("Number of instances: %d" % len(instance_ids)) |
||||
|
||||
LOGGER.info("Waiting for %d instances in region %s to be in RUNNING" % (len(instance_ids), region_number)) |
||||
utils.run_waiter_for_status(ec2_client, 'instance_running', instance_ids) |
||||
# waiter = ec2_client.get_waiter('instance_running') |
||||
# waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
LOGGER.info("Waiting for %d instances in region %s with status OK"% (len(instance_ids), region_number)) |
||||
utils.run_waiter_for_status(ec2_client, 'instance_status_ok', instance_ids) |
||||
# waiter = ec2_client.get_waiter('instance_status_ok') |
||||
# waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
LOGGER.info("Waiting for %d instances in region %s with system in OK"% (len(instance_ids), region_number)) |
||||
utils.run_waiter_for_status(ec2_client, 'system_status_ok', instance_ids) |
||||
# waiter = ec2_client.get_waiter('system_status_ok') |
||||
# waiter.wait(InstanceIds=instance_ids) |
||||
|
||||
codedeploy = session.client('codedeploy') |
||||
application_name = APPLICATION_NAME |
||||
deployment_group_name = APPLICATION_NAME + "-" + commit_id[:6] + "-" + node_name_tag + "-" + CURRENT_SESSION |
||||
repo = REPO |
||||
|
||||
LOGGER.info("Setting up to deploy commit_id %s on region %s" % (commit_id, region_number)) |
||||
utils.get_application(codedeploy, application_name) |
||||
deployment_group_id = utils.create_deployment_group( |
||||
codedeploy, region_number, application_name, deployment_group_name, node_name_tag) |
||||
if deployment_group_id: |
||||
LOGGER.info("Created deployment group with id %s" % deployment_group_id) |
||||
else: |
||||
LOGGER.info("Created deployment group with name %s was created" % deployment_group_name) |
||||
deployment_id, status = deploy(codedeploy, application_name, deployment_group_name, repo, commit_id) |
||||
LOGGER.info("Done with deployment with id: %s and status: %s" % (deployment_id, status)) |
||||
|
||||
|
||||
def deploy(codedeploy, application_name, deployment_group, repo, commit_id): |
||||
"""Deploy new code at specified revision to instance. |
||||
|
||||
arguments: |
||||
- repo: GitHub repository path from which to get the code |
||||
- commit_id: commit ID to be deployed |
||||
- wait: wait until the CodeDeploy finishes |
||||
""" |
||||
LOGGER.info("Launching CodeDeploy with commit " + commit_id) |
||||
response = codedeploy.create_deployment( |
||||
applicationName=application_name, |
||||
deploymentGroupName=deployment_group, |
||||
deploymentConfigName='CodeDeployDefault.AllAtOnce', |
||||
description='benchmark experiments', |
||||
revision={ |
||||
'revisionType': 'GitHub', |
||||
'gitHubLocation': { |
||||
'repository': repo, |
||||
'commitId': commit_id, |
||||
} |
||||
} |
||||
) |
||||
if response: |
||||
LOGGER.info("Deployment returned with deployment id: " + response["deploymentId"]) |
||||
deployment_id = response["deploymentId"] |
||||
else: |
||||
LOGGER.error("Deployment failed.") |
||||
return None, None |
||||
time.sleep(1) |
||||
start_time = time.time() |
||||
status = None |
||||
while time.time() - start_time < 600: |
||||
response = codedeploy.get_deployment(deploymentId=deployment_id) |
||||
if response and response.get('deploymentInfo'): |
||||
status = response['deploymentInfo']['status'] |
||||
if status in ('Succeeded', 'Failed', 'Stopped'): |
||||
break |
||||
time.sleep(2) |
||||
if status: |
||||
LOGGER.info("Deployment group %s finished with status %s" % (deployment_group, status)) |
||||
else: |
||||
LOGGER.info("Deployment status: time out") |
||||
return deployment_id, status |
||||
|
||||
def launch_code_deploy(region_list, region_config, commit_id): |
||||
region_collection = {} |
||||
for region_tuppple in region_list: |
||||
# node_name_tag comes first. |
||||
node_name_tag, region_number = region_tuppple |
||||
if not region_collection.get(region_number): |
||||
region_collection[region_number] = [] |
||||
region_collection[region_number].append(node_name_tag) |
||||
thread_pool = [] |
||||
for region_number in region_collection.iterkeys(): |
||||
t = threading.Thread(target=run_one_region_codedeploy, args=( |
||||
region_number, region_config, region_collection[region_number], commit_id)) |
||||
t.start() |
||||
thread_pool.append(t) |
||||
for t in thread_pool: |
||||
t.join() |
||||
LOGGER.info("Finished.") |
||||
|
||||
def get_head_commit_id(): |
||||
git_head_hash = None |
||||
try: |
||||
process = subprocess.Popen(['git', 'rev-parse', 'HEAD'], shell=False, stdout=subprocess.PIPE) |
||||
git_head_hash = process.communicate()[0].strip() |
||||
finally: |
||||
return git_head_hash |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser( |
||||
description='This script helps you start instances across multiple regions') |
||||
parser.add_argument('--instance_output', type=str, dest='instance_output', |
||||
default='instance_output.txt', |
||||
help='the file contains node_name_tag and region number of created instances.') |
||||
parser.add_argument('--region_config', type=str, dest='region_config', default='configuration.txt') |
||||
parser.add_argument('--commit_id', type=str, dest='commit_id', |
||||
default='f092d25d7a814622079fe92e9b36e10e46bc0d97') |
||||
args = parser.parse_args() |
||||
LOGGER.info("********* MAKE SURE YOU'RE RUNNING under harmony-benchmark code base *********") |
||||
commit_id = get_head_commit_id() or args.commit_id |
||||
|
||||
if not os.path.isfile(args.instance_output) or not commit_id: |
||||
LOGGER.info("%s does not exist" % args.instance_output) |
||||
sys.exit(1) |
||||
|
||||
with open(args.instance_output, "r") as fin: |
||||
region_list = [line.split(" ") for line in fin.readlines()] |
||||
region_list = [(item[0].strip(), item[1].strip()) for item in region_list] |
||||
launch_code_deploy(region_list, args.region_config, commit_id) |
@ -1 +0,0 @@ |
||||
python deploy.py --instance_output instance_output.txt |
@ -1,37 +0,0 @@ |
||||
import argparse |
||||
import logging |
||||
import sys |
||||
|
||||
from utils import utils |
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(threadName)s %(asctime)s - %(name)s - %(levelname)s - %(message)s') |
||||
LOGGER = logging.getLogger(__file__) |
||||
LOGGER.setLevel(logging.INFO) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser(description='This script helps you to genereate distribution config') |
||||
parser.add_argument('--ip_list_file', type=str, dest='ip_list_file', |
||||
default='raw_ip.txt', help="the file containing available raw ips") |
||||
# If the ip_list_file is None we need to use the region, node_name_tag and region_config to collect raw_ip |
||||
parser.add_argument('--region', type=str, dest='region_number', |
||||
default="4", help="region number") |
||||
parser.add_argument('--node_name_tag', type=str, |
||||
dest='node_name_tag', default='4-NODE-23-36-01-2018-07-05') |
||||
parser.add_argument('--region_config', type=str, |
||||
dest='region_config', default='configuration.txt') |
||||
|
||||
parser.add_argument('--shard_number', type=int, dest='shard_number', default=1) |
||||
parser.add_argument('--client_number', type=int, dest='client_number', default=1) |
||||
parser.add_argument('--distribution_config', type=str, |
||||
dest='distribution_config', default='distribution_config.txt') |
||||
args = parser.parse_args() |
||||
|
||||
if args.ip_list_file == None: |
||||
utils.generate_distribution_config2( |
||||
args.region_number, args.node_name_tag, args.region_config, |
||||
args.shard_number, args.client_number, args.distribution_config) |
||||
else: |
||||
utils.generate_distribution_config3(args.shard_number, args.client_number, |
||||
args.ip_list_file, args.distribution_config) |
||||
LOGGER.info("Done writing %s" % args.distribution_config) |
@ -1 +0,0 @@ |
||||
python generate_distribution_config.py --ip_list_file raw_ip.txt --shard_num 2 --client_num 2 |
@ -1,68 +0,0 @@ |
||||
import argparse |
||||
import logging |
||||
import os |
||||
import random |
||||
import sys |
||||
import threading |
||||
|
||||
from utils import utils |
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(threadName)s %(asctime)s - %(name)s - %(levelname)s - %(message)s') |
||||
LOGGER = logging.getLogger(__file__) |
||||
LOGGER.setLevel(logging.INFO) |
||||
|
||||
def terminate_instances_by_region(region_number, region_config, node_name_tag): |
||||
ec2_client, _ = utils.create_ec2_client(region_number, region_config) |
||||
filters = [{'Name': 'tag:Name','Values': [node_name_tag]}] |
||||
instance_ids = utils.get_instance_ids(ec2_client.describe_instances(Filters=filters)) |
||||
if instance_ids: |
||||
ec2_client.terminate_instances(InstanceIds=instance_ids) |
||||
LOGGER.info("waiting until instances with tag %s died." % node_name_tag) |
||||
waiter = ec2_client.get_waiter('instance_terminated') |
||||
waiter.wait(InstanceIds=instance_ids) |
||||
LOGGER.info("instances with node name tag %s terminated." % node_name_tag) |
||||
else: |
||||
pass |
||||
LOGGER.warn("there is no instances to terminate") |
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser(description='This script helps you to collect public ips') |
||||
parser.add_argument('--instance_output', type=str, dest='instance_output', |
||||
default='instance_output.txt', |
||||
help='the file contains node_name_tag and region number of created instances.') |
||||
parser.add_argument('--node_name_tag', type=str, dest='node_name_tag') |
||||
parser.add_argument('--region_number', type=str, dest='region_number') |
||||
parser.add_argument('--region_config', type=str, |
||||
dest='region_config', default='configuration.txt') |
||||
args = parser.parse_args() |
||||
|
||||
if not args.instance_output or not os.path.isfile(args.instance_output): |
||||
LOGGER.info("%s is not existed" % args.instance_output) |
||||
sys.exit(1) |
||||
if args.region_number and args.node_name_tag: |
||||
region_number_items = args.region_number.split(",") |
||||
node_name_tag_items = args.node_name_tag.split(",") |
||||
thread_pool = [] |
||||
for i in range(len(region_number_items)): |
||||
region_number = region_number_items[i] |
||||
node_name_tag = node_name_tag_items[i] |
||||
t = threading.Thread(target=terminate_instances_by_region, args=(region_number, args.region_config, node_name_tag)) |
||||
t.start() |
||||
thread_pool.append(t) |
||||
for t in thread_pool: |
||||
t.join() |
||||
LOGGER.info("done.") |
||||
elif args.instance_output: |
||||
with open(args.instance_output, "r") as fin: |
||||
thread_pool = [] |
||||
for line in fin.readlines(): |
||||
items = line.split(" ") |
||||
region_number = items[1].strip() |
||||
node_name_tag = items[0].strip() |
||||
t = threading.Thread(target=terminate_instances_by_region, args=(region_number, args.region_config, node_name_tag)) |
||||
t.start() |
||||
thread_pool.append(t) |
||||
for t in thread_pool: |
||||
t.join() |
||||
LOGGER.info("done.") |
Loading…
Reference in new issue