From 2275e32f710d94ff595a0dcba75ca2e07d52ad6e Mon Sep 17 00:00:00 2001 From: Ornella Altunyan <44654695+ornellaalt@users.noreply.github.com> Date: Wed, 23 Jun 2021 16:36:30 -0700 Subject: [PATCH 1/2] Reinforcement 2 pre-quiz --- quiz-app/src/assets/translations/en.json | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/quiz-app/src/assets/translations/en.json b/quiz-app/src/assets/translations/en.json index e992a0c6..62f285ef 100644 --- a/quiz-app/src/assets/translations/en.json +++ b/quiz-app/src/assets/translations/en.json @@ -2587,48 +2587,48 @@ "title": "Reinforcement 2: Pre-Lecture Quiz", "quiz": [ { - "questionText": "q1", + "questionText": "Chess and Go are games with continuous states.", "answerOptions": [ { - "answerText": "a", + "answerText": "true", "isCorrect": "false" }, { - "answerText": "b", + "answerText": "false", "isCorrect": "true" - }, - { - "answerText": "c", - "isCorrect": "false" } ] }, { - "questionText": "q2", + "questionText": "What is the CartPole problem?", "answerOptions": [ { - "answerText": "a", - "isCorrect": "true" + "answerText": "a process for eliminating outliers", + "isCorrect": "false" }, { - "answerText": "b", + "answerText": "a method for optimizing your shopping cart", "isCorrect": "false" + }, + { + "answerText": "a simplified version of balancing", + "isCorrect": "true" } ] }, { - "questionText": "q3", + "questionText": "What tool can we use to play out different scenarios of potential states in a game?", "answerOptions": [ { - "answerText": "a", + "answerText": "guess and check", "isCorrect": "false" }, { - "answerText": "b", + "answerText": "simulation environments", "isCorrect": "true" }, { - "answerText": "c", + "answerText": "state transition testing", "isCorrect": "false" } ] From 724652c6883f6eea1caad89d00c8a5a781819aca Mon Sep 17 00:00:00 2001 From: Ornella Altunyan <44654695+ornellaalt@users.noreply.github.com> Date: Wed, 23 Jun 2021 16:48:12 -0700 Subject: [PATCH 2/2] Reinforcement 2 post-quiz --- 8-Reinforcement/2-Gym/README.md | 2 +- quiz-app/src/assets/translations/en.json | 26 ++++++++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/8-Reinforcement/2-Gym/README.md b/8-Reinforcement/2-Gym/README.md index ae53c392..d2ec626c 100644 --- a/8-Reinforcement/2-Gym/README.md +++ b/8-Reinforcement/2-Gym/README.md @@ -17,7 +17,7 @@ We will use a simplified version of balancing known as a **CartPole** problem. I In this lesson, we will be using a library called **OpenAI Gym** to simulate different **environments**. You can run this lesson's code locally (eg. from Visual Studio Code), in which case the simulation will open in a new window. When running the code online, you may need to make some tweaks to the code, as described [here](https://towardsdatascience.com/rendering-openai-gym-envs-on-binder-and-google-colab-536f99391cc7). ## OpenAI Gym -In the previous lesson, the rules of the game and the state were given by the `Board` class which we defined ourselves. Here we will use a special **sumulation environment**, which will simulate the physics behind the balancing pole. One of the most popular simulation environments for training reinforcement learning algorithms is called a [Gym](https://gym.openai.com/), which is maintained by [OpenAI](https://openai.com/). By using this gym we can create difference **environments** from a cartpole simulation to Atari games. +In the previous lesson, the rules of the game and the state were given by the `Board` class which we defined ourselves. Here we will use a special **simulation environment**, which will simulate the physics behind the balancing pole. One of the most popular simulation environments for training reinforcement learning algorithms is called a [Gym](https://gym.openai.com/), which is maintained by [OpenAI](https://openai.com/). By using this gym we can create difference **environments** from a cartpole simulation to Atari games. > **Note**: You can see other environments available from OpenAI Gym [here](https://gym.openai.com/envs/#classic_control). diff --git a/quiz-app/src/assets/translations/en.json b/quiz-app/src/assets/translations/en.json index 62f285ef..d0acc6ff 100644 --- a/quiz-app/src/assets/translations/en.json +++ b/quiz-app/src/assets/translations/en.json @@ -2640,48 +2640,52 @@ "title": "Reinforcement 2: Post-Lecture Quiz", "quiz": [ { - "questionText": "q1", + "questionText": "Where do we define all possible actions in an environment?", "answerOptions": [ { - "answerText": "a", + "answerText": "methods", "isCorrect": "false" }, { - "answerText": "b", + "answerText": "action space", "isCorrect": "true" }, { - "answerText": "c", + "answerText": "action list", "isCorrect": "false" } ] }, { - "questionText": "q2", + "questionText": "What pair did we use as the dictionary key-value?", "answerOptions": [ { - "answerText": "a", + "answerText": "(state, action) as the key, Q-Table entry as the value", "isCorrect": "true" }, { - "answerText": "b", + "answerText": "state as the key, action as the value", + "isCorrect": "false" + }, + { + "answerText": "the value of the qvalues function as the key, action as the value", "isCorrect": "false" } ] }, { - "questionText": "q3", + "questionText": "What are the hyperparameters we used during Q-Learning?", "answerOptions": [ { - "answerText": "a", + "answerText": "q-table value, current reward, random action", "isCorrect": "false" }, { - "answerText": "b", + "answerText": "learning rate, discount factor, exploration/exploitation factor", "isCorrect": "true" }, { - "answerText": "c", + "answerText": "cumulative rewards, learning rate, exploration factor", "isCorrect": "false" } ]