Merge branch 'main' of https://github.com/microsoft/ML-For-Beginners into main

4 years ago · 750c7077e9
parent 322650466a 724652c688
commit 750c7077e9
2 changed files with 31 additions and 27 deletions
--- a/8-Reinforcement/2-Gym/README.md
+++ b/8-Reinforcement/2-Gym/README.md
@ -17,7 +17,7 @@ We will use a simplified version of balancing known as a **CartPole** problem. I
 In this lesson, we will be using a library called **OpenAI Gym** to simulate different **environments**. You can run this lesson's code locally (eg. from Visual Studio Code), in which case the simulation will open in a new window. When running the code online, you may need to make some tweaks to the code, as described [here](https://towardsdatascience.com/rendering-openai-gym-envs-on-binder-and-google-colab-536f99391cc7).
 ## OpenAI Gym

-In the previous lesson, the rules of the game and the state were given by the `Board` class which we defined ourselves. Here we will use a special **sumulation environment**, which will simulate the physics behind the balancing pole. One of the most popular simulation environments for training reinforcement learning algorithms is called a [Gym](https://gym.openai.com/), which is maintained by [OpenAI](https://openai.com/). By using this gym we can create difference **environments** from a cartpole simulation to Atari games.
+In the previous lesson, the rules of the game and the state were given by the `Board` class which we defined ourselves. Here we will use a special **simulation environment**, which will simulate the physics behind the balancing pole. One of the most popular simulation environments for training reinforcement learning algorithms is called a [Gym](https://gym.openai.com/), which is maintained by [OpenAI](https://openai.com/). By using this gym we can create difference **environments** from a cartpole simulation to Atari games.

 > **Note**: You can see other environments available from OpenAI Gym [here](https://gym.openai.com/envs/#classic_control). 

--- a/quiz-app/src/assets/translations/en.json
+++ b/quiz-app/src/assets/translations/en.json
@ -2587,48 +2587,48 @@
 				"title": "Reinforcement 2: Pre-Lecture Quiz",
 				"quiz": [
 					{
-						"questionText": "q1",
+						"questionText": "Chess and Go are games with continuous states.",
 						"answerOptions": [
 							{
-								"answerText": "a",
+								"answerText": "true",
 								"isCorrect": "false"
 							},
 							{
-								"answerText": "b",
+								"answerText": "false",
 								"isCorrect": "true"
-							},
-							{
-								"answerText": "c",
-								"isCorrect": "false"
 							}
 						]
 					},
 					{
-						"questionText": "q2",
+						"questionText": "What is the CartPole problem?",
 						"answerOptions": [
 							{
-								"answerText": "a",
-								"isCorrect": "true"
+								"answerText": "a process for eliminating outliers",
+								"isCorrect": "false"
 							},
 							{
-								"answerText": "b",
+								"answerText": "a method for optimizing your shopping cart",
 								"isCorrect": "false"
+							},
+							{
+								"answerText": "a simplified version of balancing",
+								"isCorrect": "true"
 							}
 						]
 					},
 					{
-						"questionText": "q3",
+						"questionText": "What tool can we use to play out different scenarios of potential states in a game?",
 						"answerOptions": [
 							{
-								"answerText": "a",
+								"answerText": "guess and check",
 								"isCorrect": "false"
 							},
 							{
-								"answerText": "b",
+								"answerText": "simulation environments",
 								"isCorrect": "true"
 							},
 							{
-								"answerText": "c",
+								"answerText": "state transition testing",
 								"isCorrect": "false"
 							}
 						]
@ -2640,48 +2640,52 @@
 				"title": "Reinforcement 2: Post-Lecture Quiz",
 				"quiz": [
 					{
-						"questionText": "q1",
+						"questionText": "Where do we define all possible actions in an environment?",
 						"answerOptions": [
 							{
-								"answerText": "a",
+								"answerText": "methods",
 								"isCorrect": "false"
 							},
 							{
-								"answerText": "b",
+								"answerText": "action space",
 								"isCorrect": "true"
 							},
 							{
-								"answerText": "c",
+								"answerText": "action list",
 								"isCorrect": "false"
 							}
 						]
 					},
 					{
-						"questionText": "q2",
+						"questionText": "What pair did we use as the dictionary key-value?",
 						"answerOptions": [
 							{
-								"answerText": "a",
+								"answerText": "(state, action) as the key, Q-Table entry as the value",
 								"isCorrect": "true"
 							},
 							{
-								"answerText": "b",
+								"answerText": "state as the key, action as the value",
+								"isCorrect": "false"
+							},
+							{
+								"answerText": "the value of the qvalues function as the key, action as the value",
 								"isCorrect": "false"
 							}
 						]
 					},
 					{
-						"questionText": "q3",
+						"questionText": "What are the hyperparameters we used during Q-Learning?",
 						"answerOptions": [
 							{
-								"answerText": "a",
+								"answerText": "q-table value, current reward, random action",
 								"isCorrect": "false"
 							},
 							{
-								"answerText": "b",
+								"answerText": "learning rate, discount factor, exploration/exploitation factor",
 								"isCorrect": "true"
 							},
 							{
-								"answerText": "c",
+								"answerText": "cumulative rewards, learning rate, exploration factor",
 								"isCorrect": "false"
 							}
 						]