From 148c42b95a6e11fd158a07a8b69f4bfa3b716f74 Mon Sep 17 00:00:00 2001 From: Amagash Date: Tue, 31 Aug 2021 18:10:50 +0200 Subject: [PATCH] [Lesson 19] Complete Notebook --- 5-Data-Science-In-Cloud/19-tbd/notebook.ipynb | 228 +++++++++++++++++- 1 file changed, 226 insertions(+), 2 deletions(-) diff --git a/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb b/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb index ac317cc..8a068aa 100644 --- a/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb +++ b/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb @@ -34,7 +34,12 @@ "cell_type": "code", "execution_count": null, "source": [ - "from azureml.core import Workspace" + "from azureml.core import Workspace, Experiment\n", + "from azureml.core.compute import AmlCompute\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.widgets import RunDetails\n", + "from azureml.core.model import InferenceConfig, Model\n", + "from azureml.core.webservice import AciWebservice" ], "outputs": [], "metadata": {} @@ -71,7 +76,226 @@ "execution_count": null, "source": [ "experiment_name = 'aml-experiment'\n", - "experiment = Experiment(ws, experiment_name)" + "experiment = Experiment(ws, experiment_name)\n", + "experiment" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Create a Compute Cluster\n", + "You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/concept-azure-machine-learning-architecture#compute-target) for your AutoML run." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "aml_name = \"heart-f-cluster\"\n", + "try:\n", + " aml_compute = AmlCompute(ws, aml_name)\n", + " print('Found existing AML compute context.')\n", + "except:\n", + " print('Creating new AML compute context.')\n", + " aml_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\", min_nodes=1, max_nodes=3)\n", + " aml_compute = AmlCompute.create(ws, name = aml_name, provisioning_configuration = aml_config)\n", + " aml_compute.wait_for_completion(show_output = True)\n", + "\n", + "cts = ws.compute_targets\n", + "compute_target = cts[aml_name]" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Data\n", + "Make sure you have uploaded the dataset to Azure ML and that the key is the same name as the dataset." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "key = 'heart-failure-records'\n", + "dataset = ws.datasets[key]\n", + "df = dataset.to_pandas_dataframe()\n", + "df.describe()" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## AutoML Configuration" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "automl_settings = {\n", + " \"experiment_timeout_minutes\": 20,\n", + " \"max_concurrent_iterations\": 3,\n", + " \"primary_metric\" : 'AUC_weighted'\n", + "}\n", + "\n", + "automl_config = AutoMLConfig(compute_target=compute_target,\n", + " task = \"classification\",\n", + " training_data=dataset,\n", + " label_column_name=\"DEATH_EVENT\",\n", + " enable_early_stopping= True,\n", + " featurization= 'auto',\n", + " debug_log = \"automl_errors.log\",\n", + " **automl_settings\n", + " )" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## AutoML Run" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "remote_run = experiment.submit(automl_config)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "RunDetails(remote_run).show()" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Save the best model" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "best_run, fitted_model = remote_run.get_output()" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "best_run.get_properties()" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "model_name = best_run.properties['model_name']\n", + "script_file_name = 'inference/score.py'\n", + "best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')\n", + "description = \"aml heart failure project sdk\"\n", + "model = best_run.register_model(model_name = model_name,\n", + " description = description,\n", + " tags = None)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Deploy the Best Model\n", + "\n", + "Run the following code to deploy the best model. You can see the state of the deployment in the Azure ML portal. This step can take a few minutes." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "inference_config = InferenceConfig(entry_script=script_file_name, environment=best_run.get_environment())\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,\n", + " memory_gb = 1,\n", + " tags = {'type': \"automl-heart-failure-prediction\"},\n", + " description = 'Sample service for AutoML Heart Failure Prediction')\n", + "\n", + "aci_service_name = 'automl-hf-sdk'\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", + "aci_service.wait_for_deployment(True)\n", + "print(aci_service.state)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "## Consume the Endpoint\n", + "You can add inputs to the following input sample. " + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "data = {\n", + " \"data\":\n", + " [\n", + " {\n", + " 'age': \"60\",\n", + " 'anaemia': \"false\",\n", + " 'creatinine_phosphokinase': \"500\",\n", + " 'diabetes': \"false\",\n", + " 'ejection_fraction': \"38\",\n", + " 'high_blood_pressure': \"false\",\n", + " 'platelets': \"260000\",\n", + " 'serum_creatinine': \"1.40\",\n", + " 'serum_sodium': \"137\",\n", + " 'sex': \"false\",\n", + " 'smoking': \"false\",\n", + " 'time': \"130\",\n", + " },\n", + " ],\n", + "}\n", + "\n", + "test_sample = str.encode(json.dumps(data))" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "response = aci_service.run(input_data=test_sample)\n", + "response" ], "outputs": [], "metadata": {}