[Lesson 19] Complete Notebook

3 years ago · 148c42b95a
parent b93b5335bc
commit 148c42b95a
1 changed files with 226 additions and 2 deletions
--- a/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb
+++ b/5-Data-Science-In-Cloud/19-tbd/notebook.ipynb
@ -34,7 +34,12 @@
   "cell_type": "code",
   "execution_count": null,
   "source": [
-    "from azureml.core import Workspace"
+    "from azureml.core import Workspace, Experiment\n",
+    "from azureml.core.compute import AmlCompute\n",
+    "from azureml.train.automl import AutoMLConfig\n",
+    "from azureml.widgets import RunDetails\n",
+    "from azureml.core.model import InferenceConfig, Model\n",
+    "from azureml.core.webservice import AciWebservice"
   ],
   "outputs": [],
   "metadata": {}
@ -71,7 +76,226 @@
   "execution_count": null,
   "source": [
    "experiment_name = 'aml-experiment'\n",
-    "experiment = Experiment(ws, experiment_name)"
+    "experiment = Experiment(ws, experiment_name)\n",
+    "experiment"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Create a Compute Cluster\n",
+    "You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/concept-azure-machine-learning-architecture#compute-target) for your AutoML run."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "aml_name = \"heart-f-cluster\"\n",
+    "try:\n",
+    "    aml_compute = AmlCompute(ws, aml_name)\n",
+    "    print('Found existing AML compute context.')\n",
+    "except:\n",
+    "    print('Creating new AML compute context.')\n",
+    "    aml_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\", min_nodes=1, max_nodes=3)\n",
+    "    aml_compute = AmlCompute.create(ws, name = aml_name, provisioning_configuration = aml_config)\n",
+    "    aml_compute.wait_for_completion(show_output = True)\n",
+    "\n",
+    "cts = ws.compute_targets\n",
+    "compute_target = cts[aml_name]"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Data\n",
+    "Make sure you have uploaded the dataset to Azure ML and that the key is the same name as the dataset."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "key = 'heart-failure-records'\n",
+    "dataset = ws.datasets[key]\n",
+    "df = dataset.to_pandas_dataframe()\n",
+    "df.describe()"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## AutoML Configuration"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "automl_settings = {\n",
+    "    \"experiment_timeout_minutes\": 20,\n",
+    "    \"max_concurrent_iterations\": 3,\n",
+    "    \"primary_metric\" : 'AUC_weighted'\n",
+    "}\n",
+    "\n",
+    "automl_config = AutoMLConfig(compute_target=compute_target,\n",
+    "                             task = \"classification\",\n",
+    "                             training_data=dataset,\n",
+    "                             label_column_name=\"DEATH_EVENT\",\n",
+    "                             enable_early_stopping= True,\n",
+    "                             featurization= 'auto',\n",
+    "                             debug_log = \"automl_errors.log\",\n",
+    "                             **automl_settings\n",
+    "                            )"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## AutoML Run"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "remote_run = experiment.submit(automl_config)"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "RunDetails(remote_run).show()"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Save the best model"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "best_run, fitted_model = remote_run.get_output()"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "best_run.get_properties()"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "model_name = best_run.properties['model_name']\n",
+    "script_file_name = 'inference/score.py'\n",
+    "best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')\n",
+    "description = \"aml heart failure project sdk\"\n",
+    "model = best_run.register_model(model_name = model_name,\n",
+    "                                description = description,\n",
+    "                                tags = None)"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Deploy the Best Model\n",
+    "\n",
+    "Run the following code to deploy the best model. You can see the state of the deployment in the Azure ML portal. This step can take a few minutes."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "inference_config = InferenceConfig(entry_script=script_file_name, environment=best_run.get_environment())\n",
+    "\n",
+    "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,\n",
+    "                                               memory_gb = 1,\n",
+    "                                               tags = {'type': \"automl-heart-failure-prediction\"},\n",
+    "                                               description = 'Sample service for AutoML Heart Failure Prediction')\n",
+    "\n",
+    "aci_service_name = 'automl-hf-sdk'\n",
+    "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
+    "aci_service.wait_for_deployment(True)\n",
+    "print(aci_service.state)"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Consume the Endpoint\n",
+    "You can add inputs to the following input sample. "
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "data = {\n",
+    "    \"data\":\n",
+    "    [\n",
+    "        {\n",
+    "            'age': \"60\",\n",
+    "            'anaemia': \"false\",\n",
+    "            'creatinine_phosphokinase': \"500\",\n",
+    "            'diabetes': \"false\",\n",
+    "            'ejection_fraction': \"38\",\n",
+    "            'high_blood_pressure': \"false\",\n",
+    "            'platelets': \"260000\",\n",
+    "            'serum_creatinine': \"1.40\",\n",
+    "            'serum_sodium': \"137\",\n",
+    "            'sex': \"false\",\n",
+    "            'smoking': \"false\",\n",
+    "            'time': \"130\",\n",
+    "        },\n",
+    "    ],\n",
+    "}\n",
+    "\n",
+    "test_sample = str.encode(json.dumps(data))"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "response = aci_service.run(input_data=test_sample)\n",
+    "response"
   ],
   "outputs": [],
   "metadata": {}